diff --git a/Cargo.lock b/Cargo.lock index 11bb1a1770..adfcbd2876 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -529,7 +529,7 @@ dependencies = [ "hashbrown 0.9.1", "log", "miette", - "regalloc", + "regalloc2", "serde", "smallvec", "souper-ir", @@ -1219,6 +1219,15 @@ dependencies = [ "winapi", ] +[[package]] +name = "fxhash" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c31b6d751ae2c7f11320402d34e41349dd1016f8d5d45e48c4312bc8625af50c" +dependencies = [ + "byteorder", +] + [[package]] name = "generic-array" version = "0.14.5" @@ -2393,14 +2402,15 @@ dependencies = [ ] [[package]] -name = "regalloc" -version = "0.0.34" +name = "regalloc2" +version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "62446b1d3ebf980bdc68837700af1d77b37bc430e524bf95319c6eada2a4cc02" +checksum = "3dd122b168f0046afcde717e002cdf76c9c87f829ae99dd12a02a0dcf7cc68f1" dependencies = [ + "fxhash", "log", - "rustc-hash", "serde", + "slice-group-by", "smallvec", ] @@ -2653,6 +2663,12 @@ dependencies = [ "rand_core 0.6.3", ] +[[package]] +name = "slice-group-by" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "03b634d87b960ab1a38c4fe143b508576f075e7c978bfad18217645ebfdfa2ec" + [[package]] name = "smallvec" version = "1.8.0" diff --git a/cranelift/codegen/Cargo.toml b/cranelift/codegen/Cargo.toml index 602715f83e..51f39a3674 100644 --- a/cranelift/codegen/Cargo.toml +++ b/cranelift/codegen/Cargo.toml @@ -23,7 +23,7 @@ serde = { version = "1.0.94", features = ["derive"], optional = true } bincode = { version = "1.2.1", optional = true } gimli = { version = "0.26.0", default-features = false, features = ["write"], optional = true } smallvec = { version = "1.6.1" } -regalloc = "0.0.34" +regalloc2 = { version = "0.1.1", features = ["checker"] } souper-ir = { version = "2.1.0", optional = true } # It is a goal of the cranelift-codegen crate to have minimal external dependencies. # Please don't add any unless they are essential to the task of creating binary @@ -77,14 +77,10 @@ all-arch = [ # For dependent crates that want to serialize some parts of cranelift enable-serde = [ "serde", - "regalloc/enable-serde", "cranelift-entity/enable-serde", + "regalloc2/enable-serde", ] -# Allow snapshotting regalloc test cases. Useful only to report bad register -# allocation failures, or for regalloc.rs developers. -regalloc-snapshot = ["bincode", "regalloc/enable-serde"] - # Enable support for the Souper harvester. souper-harvest = ["souper-ir", "souper-ir/stringify"] diff --git a/cranelift/codegen/meta/src/shared/settings.rs b/cranelift/codegen/meta/src/shared/settings.rs index b9fa89587a..58b7cd4499 100644 --- a/cranelift/codegen/meta/src/shared/settings.rs +++ b/cranelift/codegen/meta/src/shared/settings.rs @@ -3,39 +3,6 @@ use crate::cdsl::settings::{SettingGroup, SettingGroupBuilder}; pub(crate) fn define() -> SettingGroup { let mut settings = SettingGroupBuilder::new("shared"); - settings.add_enum( - "regalloc", - "Register allocator to use with the MachInst backend.", - r#" - This selects the register allocator as an option among those offered by the `regalloc.rs` - crate. Please report register allocation bugs to the maintainers of this crate whenever - possible. - - Note: this only applies to target that use the MachInst backend. As of 2020-04-17, this - means the x86_64 backend doesn't use this yet. - - Possible values: - - - `backtracking` is a greedy, backtracking register allocator as implemented in - Spidermonkey's optimizing tier IonMonkey. It may take more time to allocate registers, but - it should generate better code in general, resulting in better throughput of generated - code. - - `backtracking_checked` is the backtracking allocator with additional self checks that may - take some time to run, and thus these checks are disabled by default. - - `experimental_linear_scan` is an experimental linear scan allocator. It may take less - time to allocate registers, but generated code's quality may be inferior. As of - 2020-04-17, it is still experimental and it should not be used in production settings. - - `experimental_linear_scan_checked` is the linear scan allocator with additional self - checks that may take some time to run, and thus these checks are disabled by default. - "#, - vec![ - "backtracking", - "backtracking_checked", - "experimental_linear_scan", - "experimental_linear_scan_checked", - ], - ); - settings.add_enum( "opt_level", "Optimization level for generated code.", diff --git a/cranelift/codegen/src/isa/aarch64/abi.rs b/cranelift/codegen/src/isa/aarch64/abi.rs index 3fbfc83564..bc75ce85f3 100644 --- a/cranelift/codegen/src/isa/aarch64/abi.rs +++ b/cranelift/codegen/src/isa/aarch64/abi.rs @@ -14,7 +14,7 @@ use crate::settings; use crate::{CodegenError, CodegenResult}; use alloc::boxed::Box; use alloc::vec::Vec; -use regalloc::{RealReg, Reg, RegClass, Set, Writable}; +use regalloc2::VReg; use smallvec::{smallvec, SmallVec}; // We use a generic implementation that factors out AArch64 and x64 ABI commonalities, because @@ -80,7 +80,7 @@ fn try_fill_baldrdash_reg(call_conv: isa::CallConv, param: &ir::AbiParam) -> Opt &ir::ArgumentPurpose::VMContext => { // This is SpiderMonkey's `WasmTlsReg`. Some(ABIArg::reg( - xreg(BALDRDASH_TLS_REG).to_real_reg(), + xreg(BALDRDASH_TLS_REG).to_real_reg().unwrap(), ir::types::I64, param.extension, param.purpose, @@ -89,7 +89,7 @@ fn try_fill_baldrdash_reg(call_conv: isa::CallConv, param: &ir::AbiParam) -> Opt &ir::ArgumentPurpose::SignatureId => { // This is SpiderMonkey's `WasmTableCallSigReg`. Some(ABIArg::reg( - xreg(BALDRDASH_SIG_REG).to_real_reg(), + xreg(BALDRDASH_SIG_REG).to_real_reg().unwrap(), ir::types::I64, param.extension, param.purpose, @@ -268,7 +268,7 @@ impl ABIMachineSpec for AArch64MachineDeps { let (rcs, reg_types) = Inst::rc_for_type(param.value_type)?; if let Some(param) = try_fill_baldrdash_reg(call_conv, param) { - assert!(rcs[0] == RegClass::I64); + assert!(rcs[0] == RegClass::Int); ret.push(param); continue; } @@ -313,7 +313,7 @@ impl ABIMachineSpec for AArch64MachineDeps { "Unable to handle multi reg params with more than 2 regs" ); assert!( - rcs == &[RegClass::I64, RegClass::I64], + rcs == &[RegClass::Int, RegClass::Int], "Unable to handle non i64 regs" ); @@ -335,12 +335,12 @@ impl ABIMachineSpec for AArch64MachineDeps { ret.push(ABIArg::Slots { slots: vec![ ABIArgSlot::Reg { - reg: lower_reg.to_real_reg(), + reg: lower_reg.to_real_reg().unwrap(), ty: param.value_type, extension: param.extension, }, ABIArgSlot::Reg { - reg: upper_reg.to_real_reg(), + reg: upper_reg.to_real_reg().unwrap(), ty: param.value_type, extension: param.extension, }, @@ -356,19 +356,17 @@ impl ABIMachineSpec for AArch64MachineDeps { // Single Register parameters let rc = rcs[0]; let next_reg = match rc { - RegClass::I64 => &mut next_xreg, - RegClass::V128 => &mut next_vreg, - _ => panic!("Invalid register class: {:?}", rc), + RegClass::Int => &mut next_xreg, + RegClass::Float => &mut next_vreg, }; if *next_reg < max_per_class_reg_vals && remaining_reg_vals > 0 { let reg = match rc { - RegClass::I64 => xreg(*next_reg), - RegClass::V128 => vreg(*next_reg), - _ => unreachable!(), + RegClass::Int => xreg(*next_reg), + RegClass::Float => vreg(*next_reg), }; ret.push(ABIArg::reg( - reg.to_real_reg(), + reg.to_real_reg().unwrap(), param.value_type, param.extension, param.purpose, @@ -435,7 +433,7 @@ impl ABIMachineSpec for AArch64MachineDeps { debug_assert!(args_or_rets == ArgsOrRets::Args); if next_xreg < max_per_class_reg_vals && remaining_reg_vals > 0 { ret.push(ABIArg::reg( - xreg(next_xreg).to_real_reg(), + xreg(next_xreg).to_real_reg().unwrap(), I64, ir::ArgumentExtension::None, ir::ArgumentPurpose::Normal, @@ -505,8 +503,8 @@ impl ABIMachineSpec for AArch64MachineDeps { } } - fn gen_ret() -> Inst { - Inst::Ret + fn gen_ret(rets: Vec) -> Inst { + Inst::Ret { rets } } fn gen_add_imm(into_reg: Writable, from_reg: Reg, imm: u32) -> SmallInstVec { @@ -708,7 +706,7 @@ impl ABIMachineSpec for AArch64MachineDeps { call_conv: isa::CallConv, setup_frame: bool, flags: &settings::Flags, - clobbered_callee_saves: &Vec>, + clobbered_callee_saves: &[Writable], fixed_frame_storage_size: u32, _outgoing_args_size: u32, ) -> (u64, SmallVec<[Inst; 16]>) { @@ -716,10 +714,9 @@ impl ABIMachineSpec for AArch64MachineDeps { let mut clobbered_vec = vec![]; for ® in clobbered_callee_saves.iter() { - match reg.to_reg().get_class() { - RegClass::I64 => clobbered_int.push(reg), - RegClass::V128 => clobbered_vec.push(reg), - class => panic!("Unexpected RegClass: {:?}", class), + match reg.to_reg().class() { + RegClass::Int => clobbered_int.push(reg), + RegClass::Float => clobbered_vec.push(reg), } } @@ -758,9 +755,9 @@ impl ABIMachineSpec for AArch64MachineDeps { let iter = clobbered_int.chunks_exact(2); if let [rd] = iter.remainder() { - let rd = rd.to_reg().to_reg(); + let rd: Reg = rd.to_reg().into(); - debug_assert_eq!(rd.get_class(), RegClass::I64); + debug_assert_eq!(rd.class(), RegClass::Int); // str rd, [sp, #-16]! insts.push(Inst::Store64 { rd, @@ -776,7 +773,7 @@ impl ABIMachineSpec for AArch64MachineDeps { insts.push(Inst::Unwind { inst: UnwindInst::SaveReg { clobber_offset, - reg: rd.to_real_reg(), + reg: rd.to_real_reg().unwrap(), }, }); } @@ -785,12 +782,12 @@ impl ABIMachineSpec for AArch64MachineDeps { let mut iter = iter.rev(); while let Some([rt, rt2]) = iter.next() { - // .to_reg().to_reg(): Writable --> RealReg --> Reg - let rt = rt.to_reg().to_reg(); - let rt2 = rt2.to_reg().to_reg(); + // .to_reg().into(): Writable --> RealReg --> Reg + let rt: Reg = rt.to_reg().into(); + let rt2: Reg = rt2.to_reg().into(); - debug_assert!(rt.get_class() == RegClass::I64); - debug_assert!(rt2.get_class() == RegClass::I64); + debug_assert!(rt.class() == RegClass::Int); + debug_assert!(rt2.class() == RegClass::Int); // stp rt, rt2, [sp, #-16]! insts.push(Inst::StoreP64 { @@ -808,13 +805,13 @@ impl ABIMachineSpec for AArch64MachineDeps { insts.push(Inst::Unwind { inst: UnwindInst::SaveReg { clobber_offset, - reg: rt.to_real_reg(), + reg: rt.to_real_reg().unwrap(), }, }); insts.push(Inst::Unwind { inst: UnwindInst::SaveReg { clobber_offset: clobber_offset + (clobber_offset_change / 2) as u32, - reg: rt2.to_real_reg(), + reg: rt2.to_real_reg().unwrap(), }, }); } @@ -844,9 +841,9 @@ impl ABIMachineSpec for AArch64MachineDeps { let iter = clobbered_vec.chunks_exact(2); if let [rd] = iter.remainder() { - let rd = rd.to_reg().to_reg(); + let rd: Reg = rd.to_reg().into(); - debug_assert_eq!(rd.get_class(), RegClass::V128); + debug_assert_eq!(rd.class(), RegClass::Float); insts.push(store_vec_reg(rd)); if flags.unwind_info() { @@ -854,7 +851,7 @@ impl ABIMachineSpec for AArch64MachineDeps { insts.push(Inst::Unwind { inst: UnwindInst::SaveReg { clobber_offset, - reg: rd.to_real_reg(), + reg: rd.to_real_reg().unwrap(), }, }); } @@ -896,11 +893,11 @@ impl ABIMachineSpec for AArch64MachineDeps { let mut iter = iter.rev(); while let Some([rt, rt2]) = iter.next() { - let rt = rt.to_reg().to_reg(); - let rt2 = rt2.to_reg().to_reg(); + let rt: Reg = rt.to_reg().into(); + let rt2: Reg = rt2.to_reg().into(); - debug_assert_eq!(rt.get_class(), RegClass::V128); - debug_assert_eq!(rt2.get_class(), RegClass::V128); + debug_assert_eq!(rt.class(), RegClass::Float); + debug_assert_eq!(rt2.class(), RegClass::Float); let (inst, clobber_offset_change) = store_vec_reg_pair(rt, rt2); @@ -911,13 +908,13 @@ impl ABIMachineSpec for AArch64MachineDeps { insts.push(Inst::Unwind { inst: UnwindInst::SaveReg { clobber_offset, - reg: rt.to_real_reg(), + reg: rt.to_real_reg().unwrap(), }, }); insts.push(Inst::Unwind { inst: UnwindInst::SaveReg { clobber_offset: clobber_offset + clobber_offset_change / 2, - reg: rt2.to_real_reg(), + reg: rt2.to_real_reg().unwrap(), }, }); } @@ -934,7 +931,7 @@ impl ABIMachineSpec for AArch64MachineDeps { fn gen_clobber_restore( call_conv: isa::CallConv, flags: &settings::Flags, - clobbers: &Set>, + clobbers: &[Writable], fixed_frame_storage_size: u32, _outgoing_args_size: u32, ) -> SmallVec<[Inst; 16]> { @@ -994,31 +991,31 @@ impl ABIMachineSpec for AArch64MachineDeps { let mut iter = clobbered_vec.chunks_exact(2); while let Some([rt, rt2]) = iter.next() { - let rt = rt.map(|r| r.to_reg()); - let rt2 = rt2.map(|r| r.to_reg()); + let rt: Writable = rt.map(|r| r.into()); + let rt2: Writable = rt2.map(|r| r.into()); - debug_assert_eq!(rt.to_reg().get_class(), RegClass::V128); - debug_assert_eq!(rt2.to_reg().get_class(), RegClass::V128); + debug_assert_eq!(rt.to_reg().class(), RegClass::Float); + debug_assert_eq!(rt2.to_reg().class(), RegClass::Float); insts.push(load_vec_reg_pair(rt, rt2)); } debug_assert!(iter.remainder().len() <= 1); if let [rd] = iter.remainder() { - let rd = rd.map(|r| r.to_reg()); + let rd: Writable = rd.map(|r| r.into()); - debug_assert_eq!(rd.to_reg().get_class(), RegClass::V128); + debug_assert_eq!(rd.to_reg().class(), RegClass::Float); insts.push(load_vec_reg(rd)); } let mut iter = clobbered_int.chunks_exact(2); while let Some([rt, rt2]) = iter.next() { - let rt = rt.map(|r| r.to_reg()); - let rt2 = rt2.map(|r| r.to_reg()); + let rt: Writable = rt.map(|r| r.into()); + let rt2: Writable = rt2.map(|r| r.into()); - debug_assert_eq!(rt.to_reg().get_class(), RegClass::I64); - debug_assert_eq!(rt2.to_reg().get_class(), RegClass::I64); + debug_assert_eq!(rt.to_reg().class(), RegClass::Int); + debug_assert_eq!(rt2.to_reg().class(), RegClass::Int); // ldp rt, rt2, [sp], #16 insts.push(Inst::LoadP64 { rt, @@ -1034,9 +1031,9 @@ impl ABIMachineSpec for AArch64MachineDeps { debug_assert!(iter.remainder().len() <= 1); if let [rd] = iter.remainder() { - let rd = rd.map(|r| r.to_reg()); + let rd: Writable = rd.map(|r| r.into()); - debug_assert_eq!(rd.to_reg().get_class(), RegClass::I64); + debug_assert_eq!(rd.to_reg().class(), RegClass::Int); // ldr rd, [sp], #16 insts.push(Inst::ULoad64 { rd, @@ -1069,58 +1066,46 @@ impl ABIMachineSpec for AArch64MachineDeps { tmp: Writable, callee_conv: isa::CallConv, caller_conv: isa::CallConv, - ) -> SmallVec<[(InstIsSafepoint, Inst); 2]> { + ) -> SmallVec<[Inst; 2]> { let mut insts = SmallVec::new(); match &dest { - &CallDest::ExtName(ref name, RelocDistance::Near) => insts.push(( - InstIsSafepoint::Yes, - Inst::Call { - info: Box::new(CallInfo { - dest: name.clone(), - uses, - defs, - opcode, - caller_callconv: caller_conv, - callee_callconv: callee_conv, - }), - }, - )), + &CallDest::ExtName(ref name, RelocDistance::Near) => insts.push(Inst::Call { + info: Box::new(CallInfo { + dest: name.clone(), + uses, + defs, + opcode, + caller_callconv: caller_conv, + callee_callconv: callee_conv, + }), + }), &CallDest::ExtName(ref name, RelocDistance::Far) => { - insts.push(( - InstIsSafepoint::No, - Inst::LoadExtName { - rd: tmp, - name: Box::new(name.clone()), - offset: 0, - }, - )); - insts.push(( - InstIsSafepoint::Yes, - Inst::CallInd { - info: Box::new(CallIndInfo { - rn: tmp.to_reg(), - uses, - defs, - opcode, - caller_callconv: caller_conv, - callee_callconv: callee_conv, - }), - }, - )); - } - &CallDest::Reg(reg) => insts.push(( - InstIsSafepoint::Yes, - Inst::CallInd { + insts.push(Inst::LoadExtName { + rd: tmp, + name: Box::new(name.clone()), + offset: 0, + }); + insts.push(Inst::CallInd { info: Box::new(CallIndInfo { - rn: *reg, + rn: tmp.to_reg(), uses, defs, opcode, caller_callconv: caller_conv, callee_callconv: callee_conv, }), - }, - )), + }); + } + &CallDest::Reg(reg) => insts.push(Inst::CallInd { + info: Box::new(CallIndInfo { + rn: *reg, + uses, + defs, + opcode, + caller_callconv: caller_conv, + callee_callconv: callee_conv, + }), + }), } insts @@ -1157,9 +1142,8 @@ impl ABIMachineSpec for AArch64MachineDeps { fn get_number_of_spillslots_for_value(rc: RegClass) -> u32 { // We allocate in terms of 8-byte slots. match rc { - RegClass::I64 => 1, - RegClass::V128 => 2, - _ => panic!("Unexpected register class!"), + RegClass::Int => 1, + RegClass::Float => 2, } } @@ -1177,13 +1161,13 @@ impl ABIMachineSpec for AArch64MachineDeps { let mut caller_saved = Vec::new(); for i in 0..29 { let x = writable_xreg(i); - if is_reg_clobbered_by_call(call_conv_of_callee, x.to_reg().to_real_reg()) { + if is_reg_clobbered_by_call(call_conv_of_callee, x.to_reg().to_real_reg().unwrap()) { caller_saved.push(x); } } for i in 0..32 { let v = writable_vreg(i); - if is_reg_clobbered_by_call(call_conv_of_callee, v.to_reg().to_real_reg()) { + if is_reg_clobbered_by_call(call_conv_of_callee, v.to_reg().to_real_reg().unwrap()) { caller_saved.push(v); } } @@ -1205,7 +1189,7 @@ impl ABIMachineSpec for AArch64MachineDeps { fn get_clobbered_callee_saves( call_conv: isa::CallConv, - regs: &Set>, + regs: &[Writable], ) -> Vec> { let mut regs: Vec> = regs .iter() @@ -1215,7 +1199,7 @@ impl ABIMachineSpec for AArch64MachineDeps { // Sort registers for deterministic code output. We can do an unstable // sort because the registers will be unique (there are no dups). - regs.sort_unstable_by_key(|r| r.to_reg().get_index()); + regs.sort_unstable_by_key(|r| VReg::from(r.to_reg()).vreg()); regs } @@ -1247,29 +1231,27 @@ fn legal_type_for_machine(ty: Type) -> bool { /// callee-save? fn is_reg_saved_in_prologue(call_conv: isa::CallConv, r: RealReg) -> bool { if call_conv.extends_baldrdash() { - match r.get_class() { - RegClass::I64 => { - let enc = r.get_hw_encoding(); - return BALDRDASH_JIT_CALLEE_SAVED_GPR[enc]; + match r.class() { + RegClass::Int => { + let enc = r.hw_enc() & 31; + return BALDRDASH_JIT_CALLEE_SAVED_GPR[enc as usize]; } - RegClass::V128 => { - let enc = r.get_hw_encoding(); - return BALDRDASH_JIT_CALLEE_SAVED_FPU[enc]; + RegClass::Float => { + let enc = r.hw_enc() & 31; + return BALDRDASH_JIT_CALLEE_SAVED_FPU[enc as usize]; } - _ => unimplemented!("baldrdash callee saved on non-i64 reg classes"), }; } - match r.get_class() { - RegClass::I64 => { + match r.class() { + RegClass::Int => { // x19 - x28 inclusive are callee-saves. - r.get_hw_encoding() >= 19 && r.get_hw_encoding() <= 28 + r.hw_enc() >= 19 && r.hw_enc() <= 28 } - RegClass::V128 => { + RegClass::Float => { // v8 - v15 inclusive are callee-saves. - r.get_hw_encoding() >= 8 && r.get_hw_encoding() <= 15 + r.hw_enc() >= 8 && r.hw_enc() <= 15 } - _ => panic!("Unexpected RegClass"), } } @@ -1278,53 +1260,51 @@ fn is_reg_saved_in_prologue(call_conv: isa::CallConv, r: RealReg) -> bool { /// written by the function's body. fn get_regs_restored_in_epilogue( call_conv: isa::CallConv, - regs: &Set>, + regs: &[Writable], ) -> (Vec>, Vec>) { let mut int_saves = vec![]; let mut vec_saves = vec![]; - for ® in regs.iter() { + for ® in regs { if is_reg_saved_in_prologue(call_conv, reg.to_reg()) { - match reg.to_reg().get_class() { - RegClass::I64 => int_saves.push(reg), - RegClass::V128 => vec_saves.push(reg), - _ => panic!("Unexpected RegClass"), + match reg.to_reg().class() { + RegClass::Int => int_saves.push(reg), + RegClass::Float => vec_saves.push(reg), } } } // Sort registers for deterministic code output. We can do an unstable sort because the // registers will be unique (there are no dups). - int_saves.sort_unstable_by_key(|r| r.to_reg().get_index()); - vec_saves.sort_unstable_by_key(|r| r.to_reg().get_index()); + int_saves.sort_unstable_by_key(|r| VReg::from(r.to_reg()).vreg()); + vec_saves.sort_unstable_by_key(|r| VReg::from(r.to_reg()).vreg()); (int_saves, vec_saves) } fn is_reg_clobbered_by_call(call_conv_of_callee: isa::CallConv, r: RealReg) -> bool { if call_conv_of_callee.extends_baldrdash() { - match r.get_class() { - RegClass::I64 => { - let enc = r.get_hw_encoding(); - if !BALDRDASH_JIT_CALLEE_SAVED_GPR[enc] { + match r.class() { + RegClass::Int => { + let enc = r.hw_enc() & 31; + if !BALDRDASH_JIT_CALLEE_SAVED_GPR[enc as usize] { return true; } // Otherwise, fall through to preserve native's ABI caller-saved. } - RegClass::V128 => { - let enc = r.get_hw_encoding(); - if !BALDRDASH_JIT_CALLEE_SAVED_FPU[enc] { + RegClass::Float => { + let enc = r.hw_enc() & 31; + if !BALDRDASH_JIT_CALLEE_SAVED_FPU[enc as usize] { return true; } // Otherwise, fall through to preserve native's ABI caller-saved. } - _ => unimplemented!("baldrdash callee saved on non-i64 reg classes"), }; } - match r.get_class() { - RegClass::I64 => { + match r.class() { + RegClass::Int => { // x0 - x17 inclusive are caller-saves. - r.get_hw_encoding() <= 17 + r.hw_enc() <= 17 } - RegClass::V128 => { + RegClass::Float => { // v0 - v7 inclusive and v16 - v31 inclusive are caller-saves. The // upper 64 bits of v8 - v15 inclusive are also caller-saves. // However, because we cannot currently represent partial registers @@ -1341,6 +1321,5 @@ fn is_reg_clobbered_by_call(call_conv_of_callee: isa::CallConv, r: RealReg) -> b // include them as defs here. true } - _ => panic!("Unexpected RegClass"), } } diff --git a/cranelift/codegen/src/isa/aarch64/inst.isle b/cranelift/codegen/src/isa/aarch64/inst.isle index 734c268bb0..7392d02f77 100644 --- a/cranelift/codegen/src/isa/aarch64/inst.isle +++ b/cranelift/codegen/src/isa/aarch64/inst.isle @@ -1,795 +1,794 @@ ;; Instruction formats. (type MInst - (enum - ;; A no-op of zero size. - (Nop0) + (enum + ;; A no-op of zero size. + (Nop0) - ;; A no-op that is one instruction large. - (Nop4) + ;; A no-op that is one instruction large. + (Nop4) - ;; An ALU operation with two register sources and a register destination. - (AluRRR - (alu_op ALUOp) - (size OperandSize) - (rd WritableReg) - (rn Reg) - (rm Reg)) + ;; An ALU operation with two register sources and a register destination. + (AluRRR + (alu_op ALUOp) + (size OperandSize) + (rd WritableReg) + (rn Reg) + (rm Reg)) - ;; An ALU operation with three register sources and a register destination. - (AluRRRR - (alu_op ALUOp3) - (rd WritableReg) - (rn Reg) - (rm Reg) - (ra Reg)) + ;; An ALU operation with three register sources and a register destination. + (AluRRRR + (alu_op ALUOp3) + (rd WritableReg) + (rn Reg) + (rm Reg) + (ra Reg)) - ;; An ALU operation with a register source and an immediate-12 source, and a register - ;; destination. - (AluRRImm12 - (alu_op ALUOp) - (size OperandSize) - (rd WritableReg) - (rn Reg) - (imm12 Imm12)) + ;; An ALU operation with a register source and an immediate-12 source, and a register + ;; destination. + (AluRRImm12 + (alu_op ALUOp) + (size OperandSize) + (rd WritableReg) + (rn Reg) + (imm12 Imm12)) - ;; An ALU operation with a register source and an immediate-logic source, and a register destination. - (AluRRImmLogic - (alu_op ALUOp) - (size OperandSize) - (rd WritableReg) - (rn Reg) - (imml ImmLogic)) + ;; An ALU operation with a register source and an immediate-logic source, and a register destination. + (AluRRImmLogic + (alu_op ALUOp) + (size OperandSize) + (rd WritableReg) + (rn Reg) + (imml ImmLogic)) - ;; An ALU operation with a register source and an immediate-shiftamt source, and a register destination. - (AluRRImmShift - (alu_op ALUOp) - (size OperandSize) - (rd WritableReg) - (rn Reg) - (immshift ImmShift)) + ;; An ALU operation with a register source and an immediate-shiftamt source, and a register destination. + (AluRRImmShift + (alu_op ALUOp) + (size OperandSize) + (rd WritableReg) + (rn Reg) + (immshift ImmShift)) - ;; An ALU operation with two register sources, one of which can be shifted, and a register - ;; destination. - (AluRRRShift - (alu_op ALUOp) - (size OperandSize) - (rd WritableReg) - (rn Reg) - (rm Reg) - (shiftop ShiftOpAndAmt)) + ;; An ALU operation with two register sources, one of which can be shifted, and a register + ;; destination. + (AluRRRShift + (alu_op ALUOp) + (size OperandSize) + (rd WritableReg) + (rn Reg) + (rm Reg) + (shiftop ShiftOpAndAmt)) - ;; An ALU operation with two register sources, one of which can be {zero,sign}-extended and - ;; shifted, and a register destination. - (AluRRRExtend - (alu_op ALUOp) - (size OperandSize) - (rd WritableReg) - (rn Reg) - (rm Reg) - (extendop ExtendOp)) + ;; An ALU operation with two register sources, one of which can be {zero,sign}-extended and + ;; shifted, and a register destination. + (AluRRRExtend + (alu_op ALUOp) + (size OperandSize) + (rd WritableReg) + (rn Reg) + (rm Reg) + (extendop ExtendOp)) - ;; A bit op instruction with a single register source. - (BitRR - (op BitOp) - (size OperandSize) - (rd WritableReg) - (rn Reg)) + ;; A bit op instruction with a single register source. + (BitRR + (op BitOp) + (size OperandSize) + (rd WritableReg) + (rn Reg)) - ;; An unsigned (zero-extending) 8-bit load. - (ULoad8 - (rd WritableReg) - (mem AMode) - (flags MemFlags)) + ;; An unsigned (zero-extending) 8-bit load. + (ULoad8 + (rd WritableReg) + (mem AMode) + (flags MemFlags)) - ;; A signed (sign-extending) 8-bit load. - (SLoad8 - (rd WritableReg) - (mem AMode) - (flags MemFlags)) + ;; A signed (sign-extending) 8-bit load. + (SLoad8 + (rd WritableReg) + (mem AMode) + (flags MemFlags)) - ;; An unsigned (zero-extending) 16-bit load. - (ULoad16 - (rd WritableReg) - (mem AMode) - (flags MemFlags)) + ;; An unsigned (zero-extending) 16-bit load. + (ULoad16 + (rd WritableReg) + (mem AMode) + (flags MemFlags)) - ;; A signed (sign-extending) 16-bit load. - (SLoad16 - (rd WritableReg) - (mem AMode) - (flags MemFlags)) + ;; A signed (sign-extending) 16-bit load. + (SLoad16 + (rd WritableReg) + (mem AMode) + (flags MemFlags)) - ;; An unsigned (zero-extending) 32-bit load. - (ULoad32 - (rd WritableReg) - (mem AMode) - (flags MemFlags)) + ;; An unsigned (zero-extending) 32-bit load. + (ULoad32 + (rd WritableReg) + (mem AMode) + (flags MemFlags)) - ;; A signed (sign-extending) 32-bit load. - (SLoad32 - (rd WritableReg) - (mem AMode) - (flags MemFlags)) + ;; A signed (sign-extending) 32-bit load. + (SLoad32 + (rd WritableReg) + (mem AMode) + (flags MemFlags)) - ;; A 64-bit load. - (ULoad64 - (rd WritableReg) - (mem AMode) - (flags MemFlags)) + ;; A 64-bit load. + (ULoad64 + (rd WritableReg) + (mem AMode) + (flags MemFlags)) - ;; An 8-bit store. - (Store8 - (rd Reg) - (mem AMode) - (flags MemFlags)) + ;; An 8-bit store. + (Store8 + (rd Reg) + (mem AMode) + (flags MemFlags)) - ;; A 16-bit store. - (Store16 - (rd Reg) - (mem AMode) - (flags MemFlags)) + ;; A 16-bit store. + (Store16 + (rd Reg) + (mem AMode) + (flags MemFlags)) - ;; A 32-bit store. - (Store32 - (rd Reg) - (mem AMode) - (flags MemFlags)) + ;; A 32-bit store. + (Store32 + (rd Reg) + (mem AMode) + (flags MemFlags)) - ;; A 64-bit store. - (Store64 - (rd Reg) - (mem AMode) - (flags MemFlags)) + ;; A 64-bit store. + (Store64 + (rd Reg) + (mem AMode) + (flags MemFlags)) - ;; A store of a pair of registers. - (StoreP64 - (rt Reg) - (rt2 Reg) - (mem PairAMode) - (flags MemFlags)) + ;; A store of a pair of registers. + (StoreP64 + (rt Reg) + (rt2 Reg) + (mem PairAMode) + (flags MemFlags)) - ;; A load of a pair of registers. - (LoadP64 - (rt WritableReg) - (rt2 WritableReg) - (mem PairAMode) - (flags MemFlags)) + ;; A load of a pair of registers. + (LoadP64 + (rt WritableReg) + (rt2 WritableReg) + (mem PairAMode) + (flags MemFlags)) - ;; A MOV instruction. These are encoded as ORR's (AluRRR form) but we - ;; keep them separate at the `Inst` level for better pretty-printing - ;; and faster `is_move()` logic. - (Mov64 - (rd WritableReg) - (rm Reg)) + ;; A MOV instruction. These are encoded as ORR's (AluRRR form) but we + ;; keep them separate at the `Inst` level for better pretty-printing + ;; and faster `is_move()` logic. + (Mov64 + (rd WritableReg) + (rm Reg)) - ;; A 32-bit MOV. Zeroes the top 32 bits of the destination. This is - ;; effectively an alias for an unsigned 32-to-64-bit extension. - (Mov32 - (rd WritableReg) - (rm Reg)) + ;; A 32-bit MOV. Zeroes the top 32 bits of the destination. This is + ;; effectively an alias for an unsigned 32-to-64-bit extension. + (Mov32 + (rd WritableReg) + (rm Reg)) - ;; A MOVZ with a 16-bit immediate. - (MovZ - (rd WritableReg) - (imm MoveWideConst) - (size OperandSize)) + ;; A MOVZ with a 16-bit immediate. + (MovZ + (rd WritableReg) + (imm MoveWideConst) + (size OperandSize)) - ;; A MOVN with a 16-bit immediate. - (MovN - (rd WritableReg) - (imm MoveWideConst) - (size OperandSize)) + ;; A MOVN with a 16-bit immediate. + (MovN + (rd WritableReg) + (imm MoveWideConst) + (size OperandSize)) - ;; A MOVK with a 16-bit immediate. - (MovK - (rd WritableReg) - (imm MoveWideConst) - (size OperandSize)) + ;; A MOVK with a 16-bit immediate. + (MovK + (rd WritableReg) + (imm MoveWideConst) + (size OperandSize)) - ;; A sign- or zero-extend operation. - (Extend - (rd WritableReg) - (rn Reg) - (signed bool) - (from_bits u8) - (to_bits u8)) + ;; A sign- or zero-extend operation. + (Extend + (rd WritableReg) + (rn Reg) + (signed bool) + (from_bits u8) + (to_bits u8)) - ;; A conditional-select operation. - (CSel - (rd WritableReg) - (cond Cond) - (rn Reg) - (rm Reg)) + ;; A conditional-select operation. + (CSel + (rd WritableReg) + (cond Cond) + (rn Reg) + (rm Reg)) - ;; A conditional-set operation. - (CSet - (rd WritableReg) - (cond Cond)) + ;; A conditional-set operation. + (CSet + (rd WritableReg) + (cond Cond)) - ;; A conditional-set-mask operation. - (CSetm - (rd WritableReg) - (cond Cond)) + ;; A conditional-set-mask operation. + (CSetm + (rd WritableReg) + (cond Cond)) - ;; A conditional comparison with an immediate. - (CCmpImm - (size OperandSize) - (rn Reg) - (imm UImm5) - (nzcv NZCV) - (cond Cond)) + ;; A conditional comparison with an immediate. + (CCmpImm + (size OperandSize) + (rn Reg) + (imm UImm5) + (nzcv NZCV) + (cond Cond)) - ;; A synthetic insn, which is a load-linked store-conditional loop, that has the overall - ;; effect of atomically modifying a memory location in a particular way. Because we have - ;; no way to explain to the regalloc about earlyclobber registers, this instruction has - ;; completely fixed operand registers, and we rely on the RA's coalescing to remove copies - ;; in the surrounding code to the extent it can. The sequence is both preceded and - ;; followed by a fence which is at least as comprehensive as that of the `Fence` - ;; instruction below. This instruction is sequentially consistent. The operand - ;; conventions are: - ;; - ;; x25 (rd) address - ;; x26 (rd) second operand for `op` - ;; x27 (wr) old value - ;; x24 (wr) scratch reg; value afterwards has no meaning - ;; x28 (wr) scratch reg; value afterwards has no meaning - (AtomicRMWLoop - (ty Type) ;; I8, I16, I32 or I64 - (op AtomicRmwOp)) - - ;; An atomic read-modify-write operation. These instructions require the - ;; Large System Extension (LSE) ISA support (FEAT_LSE). The instructions have - ;; acquire-release semantics. - (AtomicRMW - (op AtomicRMWOp) - (rs Reg) - (rt WritableReg) - (rn Reg) - (ty Type)) - - ;; An atomic compare-and-swap operation. This instruction is sequentially consistent. - (AtomicCAS - (rs WritableReg) - (rt Reg) - (rn Reg) - (ty Type)) - - ;; Similar to AtomicRMWLoop, a compare-and-swap operation implemented using a load-linked - ;; store-conditional loop. - ;; This instruction is sequentially consistent. - ;; Note that the operand conventions, although very similar to AtomicRMWLoop, are different: - ;; - ;; x25 (rd) address - ;; x26 (rd) expected value - ;; x28 (rd) replacement value - ;; x27 (wr) old value - ;; x24 (wr) scratch reg; value afterwards has no meaning - (AtomicCASLoop + ;; A synthetic insn, which is a load-linked store-conditional loop, that has the overall + ;; effect of atomically modifying a memory location in a particular way. Because we have + ;; no way to explain to the regalloc about earlyclobber registers, this instruction has + ;; completely fixed operand registers, and we rely on the RA's coalescing to remove copies + ;; in the surrounding code to the extent it can. The sequence is both preceded and + ;; followed by a fence which is at least as comprehensive as that of the `Fence` + ;; instruction below. This instruction is sequentially consistent. The operand + ;; conventions are: + ;; + ;; x25 (rd) address + ;; x26 (rd) second operand for `op` + ;; x27 (wr) old value + ;; x24 (wr) scratch reg; value afterwards has no meaning + ;; x28 (wr) scratch reg; value afterwards has no meaning + (AtomicRMWLoop (ty Type) ;; I8, I16, I32 or I64 - ) - - ;; Read `access_ty` bits from address `rt`, either 8, 16, 32 or 64-bits, and put - ;; it in `rn`, optionally zero-extending to fill a word or double word result. - ;; This instruction is sequentially consistent. - (LoadAcquire - (access_ty Type) ;; I8, I16, I32 or I64 - (rt WritableReg) - (rn Reg)) - - ;; Write the lowest `ty` bits of `rt` to address `rn`. - ;; This instruction is sequentially consistent. - (StoreRelease - (access_ty Type) ;; I8, I16, I32 or I64 - (rt Reg) - (rn Reg)) - - ;; A memory fence. This must provide ordering to ensure that, at a minimum, neither loads - ;; nor stores may move forwards or backwards across the fence. Currently emitted as "dmb - ;; ish". This instruction is sequentially consistent. - (Fence) - - ;; FPU move. Note that this is distinct from a vector-register - ;; move; moving just 64 bits seems to be significantly faster. - (FpuMove64 - (rd WritableReg) - (rn Reg)) - - ;; Vector register move. - (FpuMove128 - (rd WritableReg) - (rn Reg)) - - ;; Move to scalar from a vector element. - (FpuMoveFromVec - (rd WritableReg) - (rn Reg) - (idx u8) - (size VectorSize)) - - ;; Zero-extend a SIMD & FP scalar to the full width of a vector register. - ;; 16-bit scalars require half-precision floating-point support (FEAT_FP16). - (FpuExtend - (rd WritableReg) - (rn Reg) - (size ScalarSize)) - - ;; 1-op FPU instruction. - (FpuRR - (fpu_op FPUOp1) - (rd WritableReg) - (rn Reg)) - - ;; 2-op FPU instruction. - (FpuRRR - (fpu_op FPUOp2) - (rd WritableReg) - (rn Reg) - (rm Reg)) - - (FpuRRI - (fpu_op FPUOpRI) - (rd WritableReg) - (rn Reg)) - - ;; 3-op FPU instruction. - (FpuRRRR - (fpu_op FPUOp3) - (rd WritableReg) - (rn Reg) - (rm Reg) - (ra Reg)) - - ;; FPU comparison, single-precision (32 bit). - (FpuCmp32 - (rn Reg) - (rm Reg)) - - ;; FPU comparison, double-precision (64 bit). - (FpuCmp64 - (rn Reg) - (rm Reg)) - - ;; Floating-point load, single-precision (32 bit). - (FpuLoad32 - (rd WritableReg) - (mem AMode) - (flags MemFlags)) - - ;; Floating-point store, single-precision (32 bit). - (FpuStore32 - (rd Reg) - (mem AMode) - (flags MemFlags)) - - ;; Floating-point load, double-precision (64 bit). - (FpuLoad64 - (rd WritableReg) - (mem AMode) - (flags MemFlags)) - - ;; Floating-point store, double-precision (64 bit). - (FpuStore64 - (rd Reg) - (mem AMode) - (flags MemFlags)) - - ;; Floating-point/vector load, 128 bit. - (FpuLoad128 - (rd WritableReg) - (mem AMode) - (flags MemFlags)) - - ;; Floating-point/vector store, 128 bit. - (FpuStore128 - (rd Reg) - (mem AMode) - (flags MemFlags)) - - ;; A load of a pair of floating-point registers, double precision (64-bit). - (FpuLoadP64 - (rt WritableReg) - (rt2 WritableReg) - (mem PairAMode) - (flags MemFlags)) - - ;; A store of a pair of floating-point registers, double precision (64-bit). - (FpuStoreP64 - (rt Reg) - (rt2 Reg) - (mem PairAMode) - (flags MemFlags)) - - ;; A load of a pair of floating-point registers, 128-bit. - (FpuLoadP128 - (rt WritableReg) - (rt2 WritableReg) - (mem PairAMode) - (flags MemFlags)) - - ;; A store of a pair of floating-point registers, 128-bit. - (FpuStoreP128 - (rt Reg) - (rt2 Reg) - (mem PairAMode) - (flags MemFlags)) - - (LoadFpuConst64 - (rd WritableReg) - (const_data u64)) - - (LoadFpuConst128 - (rd WritableReg) - (const_data u128)) - - ;; Conversion: FP -> integer. - (FpuToInt - (op FpuToIntOp) - (rd WritableReg) - (rn Reg)) - - ;; Conversion: integer -> FP. - (IntToFpu - (op IntToFpuOp) - (rd WritableReg) - (rn Reg)) - - ;; FP conditional select, 32 bit. - (FpuCSel32 - (rd WritableReg) - (rn Reg) - (rm Reg) - (cond Cond)) - - ;; FP conditional select, 64 bit. - (FpuCSel64 - (rd WritableReg) - (rn Reg) - (rm Reg) - (cond Cond)) - - ;; Round to integer. - (FpuRound - (op FpuRoundMode) - (rd WritableReg) - (rn Reg)) - - ;; Move from a GPR to a vector register. The scalar value is parked in the lowest lane - ;; of the destination, and all other lanes are zeroed out. Currently only 32- and 64-bit - ;; transactions are supported. - (MovToFpu - (rd WritableReg) - (rn Reg) - (size ScalarSize)) - - ;; Loads a floating-point immediate. - (FpuMoveFPImm - (rd WritableReg) - (imm ASIMDFPModImm) - (size ScalarSize)) - - ;; Move to a vector element from a GPR. - (MovToVec - (rd WritableReg) - (rn Reg) - (idx u8) - (size VectorSize)) - - ;; Unsigned move from a vector element to a GPR. - (MovFromVec - (rd WritableReg) - (rn Reg) - (idx u8) - (size VectorSize)) - - ;; Signed move from a vector element to a GPR. - (MovFromVecSigned - (rd WritableReg) - (rn Reg) - (idx u8) - (size VectorSize) - (scalar_size OperandSize)) - - ;; Duplicate general-purpose register to vector. - (VecDup - (rd WritableReg) - (rn Reg) - (size VectorSize)) - - ;; Duplicate scalar to vector. - (VecDupFromFpu - (rd WritableReg) - (rn Reg) - (size VectorSize)) - - ;; Duplicate FP immediate to vector. - (VecDupFPImm - (rd WritableReg) - (imm ASIMDFPModImm) - (size VectorSize)) - - ;; Duplicate immediate to vector. - (VecDupImm - (rd WritableReg) - (imm ASIMDMovModImm) - (invert bool) - (size VectorSize)) - - ;; Vector extend. - (VecExtend - (t VecExtendOp) - (rd WritableReg) - (rn Reg) - (high_half bool)) - - ;; Move vector element to another vector element. - (VecMovElement - (rd WritableReg) - (rn Reg) - (dest_idx u8) - (src_idx u8) - (size VectorSize)) - - ;; Vector widening operation. - (VecRRLong - (op VecRRLongOp) - (rd WritableReg) - (rn Reg) - (high_half bool)) - - ;; Vector narrowing operation. - (VecRRNarrow - (op VecRRNarrowOp) - (rd WritableReg) - (rn Reg) - (high_half bool)) - - ;; 1-operand vector instruction that operates on a pair of elements. - (VecRRPair - (op VecPairOp) - (rd WritableReg) - (rn Reg)) - - ;; 2-operand vector instruction that produces a result with twice the - ;; lane width and half the number of lanes. - (VecRRRLong - (alu_op VecRRRLongOp) - (rd WritableReg) - (rn Reg) - (rm Reg) - (high_half bool)) - - ;; 1-operand vector instruction that extends elements of the input - ;; register and operates on a pair of elements. The output lane width - ;; is double that of the input. - (VecRRPairLong - (op VecRRPairLongOp) - (rd WritableReg) - (rn Reg)) - - ;; A vector ALU op. - (VecRRR - (alu_op VecALUOp) - (rd WritableReg) - (rn Reg) - (rm Reg) - (size VectorSize)) - - ;; Vector two register miscellaneous instruction. - (VecMisc - (op VecMisc2) - (rd WritableReg) - (rn Reg) - (size VectorSize)) - - ;; Vector instruction across lanes. - (VecLanes - (op VecLanesOp) - (rd WritableReg) - (rn Reg) - (size VectorSize)) - - ;; Vector shift by immediate Shift Left (immediate), Unsigned Shift Right (immediate) - ;; Signed Shift Right (immediate). These are somewhat unusual in that, for right shifts, - ;; the allowed range of `imm` values is 1 to lane-size-in-bits, inclusive. A zero - ;; right-shift cannot be encoded. Left shifts are "normal", though, having valid `imm` - ;; values from 0 to lane-size-in-bits - 1 inclusive. - (VecShiftImm - (op VecShiftImmOp) - (rd WritableReg) - (rn Reg) - (size VectorSize) - (imm u8)) - - ;; Vector extract - create a new vector, being the concatenation of the lowest `imm4` bytes - ;; of `rm` followed by the uppermost `16 - imm4` bytes of `rn`. - (VecExtract - (rd WritableReg) - (rn Reg) - (rm Reg) - (imm4 u8)) - - ;; Table vector lookup - single register table. The table consists of 8-bit elements and is - ;; stored in `rn`, while `rm` contains 8-bit element indices. `is_extension` specifies whether - ;; to emit a TBX or a TBL instruction, i.e. whether to leave the elements in the destination - ;; vector that correspond to out-of-range indices (greater than 15) unmodified or to set them - ;; to 0. - (VecTbl - (rd WritableReg) - (rn Reg) - (rm Reg) - (is_extension bool)) - - ;; Table vector lookup - two register table. The table consists of 8-bit elements and is - ;; stored in `rn` and `rn2`, while `rm` contains 8-bit element indices. `is_extension` - ;; specifies whether to emit a TBX or a TBL instruction, i.e. whether to leave the elements in - ;; the destination vector that correspond to out-of-range indices (greater than 31) unmodified - ;; or to set them to 0. The table registers `rn` and `rn2` must have consecutive numbers - ;; modulo 32, that is v31 and v0 (in that order) are consecutive registers. - (VecTbl2 - (rd WritableReg) - (rn Reg) - (rn2 Reg) - (rm Reg) - (is_extension bool)) - - ;; Load an element and replicate to all lanes of a vector. - (VecLoadReplicate - (rd WritableReg) - (rn Reg) - (size VectorSize)) - - ;; Vector conditional select, 128 bit. A synthetic instruction, which generates a 4-insn - ;; control-flow diamond. - (VecCSel - (rd WritableReg) - (rn Reg) - (rm Reg) - (cond Cond)) - - ;; Move to the NZCV flags (actually a `MSR NZCV, Xn` insn). - (MovToNZCV - (rn Reg)) - - ;; Move from the NZCV flags (actually a `MRS Xn, NZCV` insn). - (MovFromNZCV - (rd WritableReg)) - - ;; A machine call instruction. N.B.: this allows only a +/- 128MB offset (it uses a relocation - ;; of type `Reloc::Arm64Call`); if the destination distance is not `RelocDistance::Near`, the - ;; code should use a `LoadExtName` / `CallInd` sequence instead, allowing an arbitrary 64-bit - ;; target. - (Call - (info BoxCallInfo)) - - ;; A machine indirect-call instruction. - (CallInd - (info BoxCallIndInfo)) - - ;; ---- branches (exactly one must appear at end of BB) ---- - - ;; A machine return instruction. - (Ret) - - ;; A placeholder instruction, generating no code, meaning that a function epilogue must be - ;; inserted there. - (EpiloguePlaceholder) - - ;; An unconditional branch. - (Jump - (dest BranchTarget)) - - ;; A conditional branch. Contains two targets; at emission time, both are emitted, but - ;; the MachBuffer knows to truncate the trailing branch if fallthrough. We optimize the - ;; choice of taken/not_taken (inverting the branch polarity as needed) based on the - ;; fallthrough at the time of lowering. - (CondBr - (taken BranchTarget) - (not_taken BranchTarget) - (kind CondBrKind)) - - ;; A conditional trap: execute a `udf` if the condition is true. This is - ;; one VCode instruction because it uses embedded control flow; it is - ;; logically a single-in, single-out region, but needs to appear as one - ;; unit to the register allocator. - ;; - ;; The `CondBrKind` gives the conditional-branch condition that will - ;; *execute* the embedded `Inst`. (In the emitted code, we use the inverse - ;; of this condition in a branch that skips the trap instruction.) - (TrapIf - (kind CondBrKind) - (trap_code TrapCode)) - - ;; An indirect branch through a register, augmented with set of all - ;; possible successors. - (IndirectBr - (rn Reg) - (targets VecMachLabel)) - - ;; A "break" instruction, used for e.g. traps and debug breakpoints. - (Brk) - - ;; An instruction guaranteed to always be undefined and to trigger an illegal instruction at - ;; runtime. - (Udf - (trap_code TrapCode)) - - ;; Compute the address (using a PC-relative offset) of a memory location, using the `ADR` - ;; instruction. Note that we take a simple offset, not a `MemLabel`, here, because `Adr` is - ;; only used for now in fixed lowering sequences with hardcoded offsets. In the future we may - ;; need full `MemLabel` support. - (Adr - (rd WritableReg) - ;; Offset in range -2^20 .. 2^20. - (off i32)) - - ;; Raw 32-bit word, used for inline constants and jump-table entries. - (Word4 - (data u32)) - - ;; Raw 64-bit word, used for inline constants. - (Word8 - (data u64)) - - ;; Jump-table sequence, as one compound instruction (see note in lower_inst.rs for rationale). - (JTSequence - (info BoxJTSequenceInfo) - (ridx Reg) - (rtmp1 WritableReg) - (rtmp2 WritableReg)) - - ;; Load an inline symbol reference. - (LoadExtName - (rd WritableReg) - (name BoxExternalName) - (offset i64)) - - ;; Load address referenced by `mem` into `rd`. - (LoadAddr - (rd WritableReg) - (mem AMode)) - - ;; Marker, no-op in generated code: SP "virtual offset" is adjusted. This - ;; controls how AMode::NominalSPOffset args are lowered. - (VirtualSPOffsetAdj - (offset i64)) - - ;; Meta-insn, no-op in generated code: emit constant/branch veneer island - ;; at this point (with a guard jump around it) if less than the needed - ;; space is available before the next branch deadline. See the `MachBuffer` - ;; implementation in `machinst/buffer.rs` for the overall algorithm. In - ;; brief, we retain a set of "pending/unresolved label references" from - ;; branches as we scan forward through instructions to emit machine code; - ;; if we notice we're about to go out of range on an unresolved reference, - ;; we stop, emit a bunch of "veneers" (branches in a form that has a longer - ;; range, e.g. a 26-bit-offset unconditional jump), and point the original - ;; label references to those. This is an "island" because it comes in the - ;; middle of the code. - ;; - ;; This meta-instruction is a necessary part of the logic that determines - ;; where to place islands. Ordinarily, we want to place them between basic - ;; blocks, so we compute the worst-case size of each block, and emit the - ;; island before starting a block if we would exceed a deadline before the - ;; end of the block. However, some sequences (such as an inline jumptable) - ;; are variable-length and not accounted for by this logic; so these - ;; lowered sequences include an `EmitIsland` to trigger island generation - ;; where necessary. - (EmitIsland - ;; The needed space before the next deadline. - (needed_space CodeOffset)) - - ;; A call to the `ElfTlsGetAddr` libcall. Returns address of TLS symbol in x0. - (ElfTlsGetAddr - (symbol ExternalName)) - - ;; A definition of a value label. - (ValueLabelMarker - (reg Reg) - (label ValueLabel)) - - ;; An unwind pseudo-instruction. - (Unwind - (inst UnwindInst)) -)) + (op AtomicRmwOp)) + + ;; An atomic read-modify-write operation. These instructions require the + ;; Large System Extension (LSE) ISA support (FEAT_LSE). The instructions have + ;; acquire-release semantics. + (AtomicRMW + (op AtomicRMWOp) + (rs Reg) + (rt WritableReg) + (rn Reg) + (ty Type)) + + ;; An atomic compare-and-swap operation. This instruction is sequentially consistent. + (AtomicCAS + (rs WritableReg) + (rt Reg) + (rn Reg) + (ty Type)) + + ;; Similar to AtomicRMWLoop, a compare-and-swap operation implemented using a load-linked + ;; store-conditional loop. + ;; This instruction is sequentially consistent. + ;; Note that the operand conventions, although very similar to AtomicRMWLoop, are different: + ;; + ;; x25 (rd) address + ;; x26 (rd) expected value + ;; x28 (rd) replacement value + ;; x27 (wr) old value + ;; x24 (wr) scratch reg; value afterwards has no meaning + (AtomicCASLoop + (ty Type) ;; I8, I16, I32 or I64 + ) + + ;; Read `access_ty` bits from address `rt`, either 8, 16, 32 or 64-bits, and put + ;; it in `rn`, optionally zero-extending to fill a word or double word result. + ;; This instruction is sequentially consistent. + (LoadAcquire + (access_ty Type) ;; I8, I16, I32 or I64 + (rt WritableReg) + (rn Reg)) + + ;; Write the lowest `ty` bits of `rt` to address `rn`. + ;; This instruction is sequentially consistent. + (StoreRelease + (access_ty Type) ;; I8, I16, I32 or I64 + (rt Reg) + (rn Reg)) + + ;; A memory fence. This must provide ordering to ensure that, at a minimum, neither loads + ;; nor stores may move forwards or backwards across the fence. Currently emitted as "dmb + ;; ish". This instruction is sequentially consistent. + (Fence) + + ;; FPU move. Note that this is distinct from a vector-register + ;; move; moving just 64 bits seems to be significantly faster. + (FpuMove64 + (rd WritableReg) + (rn Reg)) + + ;; Vector register move. + (FpuMove128 + (rd WritableReg) + (rn Reg)) + + ;; Move to scalar from a vector element. + (FpuMoveFromVec + (rd WritableReg) + (rn Reg) + (idx u8) + (size VectorSize)) + + ;; Zero-extend a SIMD & FP scalar to the full width of a vector register. + ;; 16-bit scalars require half-precision floating-point support (FEAT_FP16). + (FpuExtend + (rd WritableReg) + (rn Reg) + (size ScalarSize)) + + ;; 1-op FPU instruction. + (FpuRR + (fpu_op FPUOp1) + (rd WritableReg) + (rn Reg)) + + ;; 2-op FPU instruction. + (FpuRRR + (fpu_op FPUOp2) + (rd WritableReg) + (rn Reg) + (rm Reg)) + + (FpuRRI + (fpu_op FPUOpRI) + (rd WritableReg) + (rn Reg)) + + ;; 3-op FPU instruction. + (FpuRRRR + (fpu_op FPUOp3) + (rd WritableReg) + (rn Reg) + (rm Reg) + (ra Reg)) + + ;; FPU comparison, single-precision (32 bit). + (FpuCmp32 + (rn Reg) + (rm Reg)) + + ;; FPU comparison, double-precision (64 bit). + (FpuCmp64 + (rn Reg) + (rm Reg)) + + ;; Floating-point load, single-precision (32 bit). + (FpuLoad32 + (rd WritableReg) + (mem AMode) + (flags MemFlags)) + + ;; Floating-point store, single-precision (32 bit). + (FpuStore32 + (rd Reg) + (mem AMode) + (flags MemFlags)) + + ;; Floating-point load, double-precision (64 bit). + (FpuLoad64 + (rd WritableReg) + (mem AMode) + (flags MemFlags)) + + ;; Floating-point store, double-precision (64 bit). + (FpuStore64 + (rd Reg) + (mem AMode) + (flags MemFlags)) + + ;; Floating-point/vector load, 128 bit. + (FpuLoad128 + (rd WritableReg) + (mem AMode) + (flags MemFlags)) + + ;; Floating-point/vector store, 128 bit. + (FpuStore128 + (rd Reg) + (mem AMode) + (flags MemFlags)) + + ;; A load of a pair of floating-point registers, double precision (64-bit). + (FpuLoadP64 + (rt WritableReg) + (rt2 WritableReg) + (mem PairAMode) + (flags MemFlags)) + + ;; A store of a pair of floating-point registers, double precision (64-bit). + (FpuStoreP64 + (rt Reg) + (rt2 Reg) + (mem PairAMode) + (flags MemFlags)) + + ;; A load of a pair of floating-point registers, 128-bit. + (FpuLoadP128 + (rt WritableReg) + (rt2 WritableReg) + (mem PairAMode) + (flags MemFlags)) + + ;; A store of a pair of floating-point registers, 128-bit. + (FpuStoreP128 + (rt Reg) + (rt2 Reg) + (mem PairAMode) + (flags MemFlags)) + + (LoadFpuConst64 + (rd WritableReg) + (const_data u64)) + + (LoadFpuConst128 + (rd WritableReg) + (const_data u128)) + + ;; Conversion: FP -> integer. + (FpuToInt + (op FpuToIntOp) + (rd WritableReg) + (rn Reg)) + + ;; Conversion: integer -> FP. + (IntToFpu + (op IntToFpuOp) + (rd WritableReg) + (rn Reg)) + + ;; FP conditional select, 32 bit. + (FpuCSel32 + (rd WritableReg) + (rn Reg) + (rm Reg) + (cond Cond)) + + ;; FP conditional select, 64 bit. + (FpuCSel64 + (rd WritableReg) + (rn Reg) + (rm Reg) + (cond Cond)) + + ;; Round to integer. + (FpuRound + (op FpuRoundMode) + (rd WritableReg) + (rn Reg)) + + ;; Move from a GPR to a vector register. The scalar value is parked in the lowest lane + ;; of the destination, and all other lanes are zeroed out. Currently only 32- and 64-bit + ;; transactions are supported. + (MovToFpu + (rd WritableReg) + (rn Reg) + (size ScalarSize)) + + ;; Loads a floating-point immediate. + (FpuMoveFPImm + (rd WritableReg) + (imm ASIMDFPModImm) + (size ScalarSize)) + + ;; Move to a vector element from a GPR. + (MovToVec + (rd WritableReg) + (rn Reg) + (idx u8) + (size VectorSize)) + + ;; Unsigned move from a vector element to a GPR. + (MovFromVec + (rd WritableReg) + (rn Reg) + (idx u8) + (size VectorSize)) + + ;; Signed move from a vector element to a GPR. + (MovFromVecSigned + (rd WritableReg) + (rn Reg) + (idx u8) + (size VectorSize) + (scalar_size OperandSize)) + + ;; Duplicate general-purpose register to vector. + (VecDup + (rd WritableReg) + (rn Reg) + (size VectorSize)) + + ;; Duplicate scalar to vector. + (VecDupFromFpu + (rd WritableReg) + (rn Reg) + (size VectorSize)) + + ;; Duplicate FP immediate to vector. + (VecDupFPImm + (rd WritableReg) + (imm ASIMDFPModImm) + (size VectorSize)) + + ;; Duplicate immediate to vector. + (VecDupImm + (rd WritableReg) + (imm ASIMDMovModImm) + (invert bool) + (size VectorSize)) + + ;; Vector extend. + (VecExtend + (t VecExtendOp) + (rd WritableReg) + (rn Reg) + (high_half bool)) + + ;; Move vector element to another vector element. + (VecMovElement + (rd WritableReg) + (rn Reg) + (dest_idx u8) + (src_idx u8) + (size VectorSize)) + + ;; Vector widening operation. + (VecRRLong + (op VecRRLongOp) + (rd WritableReg) + (rn Reg) + (high_half bool)) + + ;; Vector narrowing operation. + (VecRRNarrow + (op VecRRNarrowOp) + (rd WritableReg) + (rn Reg) + (high_half bool)) + + ;; 1-operand vector instruction that operates on a pair of elements. + (VecRRPair + (op VecPairOp) + (rd WritableReg) + (rn Reg)) + + ;; 2-operand vector instruction that produces a result with twice the + ;; lane width and half the number of lanes. + (VecRRRLong + (alu_op VecRRRLongOp) + (rd WritableReg) + (rn Reg) + (rm Reg) + (high_half bool)) + + ;; 1-operand vector instruction that extends elements of the input + ;; register and operates on a pair of elements. The output lane width + ;; is double that of the input. + (VecRRPairLong + (op VecRRPairLongOp) + (rd WritableReg) + (rn Reg)) + + ;; A vector ALU op. + (VecRRR + (alu_op VecALUOp) + (rd WritableReg) + (rn Reg) + (rm Reg) + (size VectorSize)) + + ;; Vector two register miscellaneous instruction. + (VecMisc + (op VecMisc2) + (rd WritableReg) + (rn Reg) + (size VectorSize)) + + ;; Vector instruction across lanes. + (VecLanes + (op VecLanesOp) + (rd WritableReg) + (rn Reg) + (size VectorSize)) + + ;; Vector shift by immediate Shift Left (immediate), Unsigned Shift Right (immediate) + ;; Signed Shift Right (immediate). These are somewhat unusual in that, for right shifts, + ;; the allowed range of `imm` values is 1 to lane-size-in-bits, inclusive. A zero + ;; right-shift cannot be encoded. Left shifts are "normal", though, having valid `imm` + ;; values from 0 to lane-size-in-bits - 1 inclusive. + (VecShiftImm + (op VecShiftImmOp) + (rd WritableReg) + (rn Reg) + (size VectorSize) + (imm u8)) + + ;; Vector extract - create a new vector, being the concatenation of the lowest `imm4` bytes + ;; of `rm` followed by the uppermost `16 - imm4` bytes of `rn`. + (VecExtract + (rd WritableReg) + (rn Reg) + (rm Reg) + (imm4 u8)) + + ;; Table vector lookup - single register table. The table consists of 8-bit elements and is + ;; stored in `rn`, while `rm` contains 8-bit element indices. `is_extension` specifies whether + ;; to emit a TBX or a TBL instruction, i.e. whether to leave the elements in the destination + ;; vector that correspond to out-of-range indices (greater than 15) unmodified or to set them + ;; to 0. + (VecTbl + (rd WritableReg) + (rn Reg) + (rm Reg) + (is_extension bool)) + + ;; Table vector lookup - two register table. The table consists of 8-bit elements and is + ;; stored in `rn` and `rn2`, while `rm` contains 8-bit element indices. `is_extension` + ;; specifies whether to emit a TBX or a TBL instruction, i.e. whether to leave the elements in + ;; the destination vector that correspond to out-of-range indices (greater than 31) unmodified + ;; or to set them to 0. The table registers `rn` and `rn2` must have consecutive numbers + ;; modulo 32, that is v31 and v0 (in that order) are consecutive registers. + (VecTbl2 + (rd WritableReg) + (rn Reg) + (rn2 Reg) + (rm Reg) + (is_extension bool)) + + ;; Load an element and replicate to all lanes of a vector. + (VecLoadReplicate + (rd WritableReg) + (rn Reg) + (size VectorSize)) + + ;; Vector conditional select, 128 bit. A synthetic instruction, which generates a 4-insn + ;; control-flow diamond. + (VecCSel + (rd WritableReg) + (rn Reg) + (rm Reg) + (cond Cond)) + + ;; Move to the NZCV flags (actually a `MSR NZCV, Xn` insn). + (MovToNZCV + (rn Reg)) + + ;; Move from the NZCV flags (actually a `MRS Xn, NZCV` insn). + (MovFromNZCV + (rd WritableReg)) + + ;; A machine call instruction. N.B.: this allows only a +/- 128MB offset (it uses a relocation + ;; of type `Reloc::Arm64Call`); if the destination distance is not `RelocDistance::Near`, the + ;; code should use a `LoadExtName` / `CallInd` sequence instead, allowing an arbitrary 64-bit + ;; target. + (Call + (info BoxCallInfo)) + + ;; A machine indirect-call instruction. + (CallInd + (info BoxCallIndInfo)) + + ;; ---- branches (exactly one must appear at end of BB) ---- + + ;; A machine return instruction. + (Ret + (rets VecReg)) + + ;; A placeholder instruction, generating no code, meaning that a function epilogue must be + ;; inserted there. + (EpiloguePlaceholder) + + ;; An unconditional branch. + (Jump + (dest BranchTarget)) + + ;; A conditional branch. Contains two targets; at emission time, both are emitted, but + ;; the MachBuffer knows to truncate the trailing branch if fallthrough. We optimize the + ;; choice of taken/not_taken (inverting the branch polarity as needed) based on the + ;; fallthrough at the time of lowering. + (CondBr + (taken BranchTarget) + (not_taken BranchTarget) + (kind CondBrKind)) + + ;; A conditional trap: execute a `udf` if the condition is true. This is + ;; one VCode instruction because it uses embedded control flow; it is + ;; logically a single-in, single-out region, but needs to appear as one + ;; unit to the register allocator. + ;; + ;; The `CondBrKind` gives the conditional-branch condition that will + ;; *execute* the embedded `Inst`. (In the emitted code, we use the inverse + ;; of this condition in a branch that skips the trap instruction.) + (TrapIf + (kind CondBrKind) + (trap_code TrapCode)) + + ;; An indirect branch through a register, augmented with set of all + ;; possible successors. + (IndirectBr + (rn Reg) + (targets VecMachLabel)) + + ;; A "break" instruction, used for e.g. traps and debug breakpoints. + (Brk) + + ;; An instruction guaranteed to always be undefined and to trigger an illegal instruction at + ;; runtime. + (Udf + (trap_code TrapCode)) + + ;; Compute the address (using a PC-relative offset) of a memory location, using the `ADR` + ;; instruction. Note that we take a simple offset, not a `MemLabel`, here, because `Adr` is + ;; only used for now in fixed lowering sequences with hardcoded offsets. In the future we may + ;; need full `MemLabel` support. + (Adr + (rd WritableReg) + ;; Offset in range -2^20 .. 2^20. + (off i32)) + + ;; Raw 32-bit word, used for inline constants and jump-table entries. + (Word4 + (data u32)) + + ;; Raw 64-bit word, used for inline constants. + (Word8 + (data u64)) + + ;; Jump-table sequence, as one compound instruction (see note in lower_inst.rs for rationale). + (JTSequence + (info BoxJTSequenceInfo) + (ridx Reg) + (rtmp1 WritableReg) + (rtmp2 WritableReg)) + + ;; Load an inline symbol reference. + (LoadExtName + (rd WritableReg) + (name BoxExternalName) + (offset i64)) + + ;; Load address referenced by `mem` into `rd`. + (LoadAddr + (rd WritableReg) + (mem AMode)) + + ;; Marker, no-op in generated code: SP "virtual offset" is adjusted. This + ;; controls how AMode::NominalSPOffset args are lowered. + (VirtualSPOffsetAdj + (offset i64)) + + ;; Meta-insn, no-op in generated code: emit constant/branch veneer island + ;; at this point (with a guard jump around it) if less than the needed + ;; space is available before the next branch deadline. See the `MachBuffer` + ;; implementation in `machinst/buffer.rs` for the overall algorithm. In + ;; brief, we retain a set of "pending/unresolved label references" from + ;; branches as we scan forward through instructions to emit machine code; + ;; if we notice we're about to go out of range on an unresolved reference, + ;; we stop, emit a bunch of "veneers" (branches in a form that has a longer + ;; range, e.g. a 26-bit-offset unconditional jump), and point the original + ;; label references to those. This is an "island" because it comes in the + ;; middle of the code. + ;; + ;; This meta-instruction is a necessary part of the logic that determines + ;; where to place islands. Ordinarily, we want to place them between basic + ;; blocks, so we compute the worst-case size of each block, and emit the + ;; island before starting a block if we would exceed a deadline before the + ;; end of the block. However, some sequences (such as an inline jumptable) + ;; are variable-length and not accounted for by this logic; so these + ;; lowered sequences include an `EmitIsland` to trigger island generation + ;; where necessary. + (EmitIsland + ;; The needed space before the next deadline. + (needed_space CodeOffset)) + + ;; A call to the `ElfTlsGetAddr` libcall. Returns address of TLS symbol in x0. + (ElfTlsGetAddr + (symbol ExternalName)) + + ;; An unwind pseudo-instruction. + (Unwind + (inst UnwindInst)) + + ;; A dummy use, useful to keep a value alive. + (DummyUse + (reg Reg)))) ;; An ALU operation. This can be paired with several instruction formats ;; below (see `Inst`) in any combination. diff --git a/cranelift/codegen/src/isa/aarch64/inst/args.rs b/cranelift/codegen/src/isa/aarch64/inst/args.rs index 1a836f1a71..2a25c6702a 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/args.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/args.rs @@ -6,10 +6,7 @@ use crate::ir::types::*; use crate::ir::Type; use crate::isa::aarch64::inst::*; -use crate::machinst::{ty_bits, MachLabel}; - -use regalloc::{PrettyPrint, RealRegUniverse, Reg, Writable}; - +use crate::machinst::{ty_bits, MachLabel, PrettyPrint, Reg, Writable}; use core::convert::Into; use std::string::String; @@ -222,6 +219,29 @@ impl AMode { _ => None, } } + + pub fn with_allocs(&self, allocs: &mut AllocationConsumer<'_>) -> Self { + // This should match `memarg_operands()`. + match self { + &AMode::Unscaled(reg, imm9) => AMode::Unscaled(allocs.next(reg), imm9), + &AMode::UnsignedOffset(r, uimm12) => AMode::UnsignedOffset(allocs.next(r), uimm12), + &AMode::RegReg(r1, r2) => AMode::RegReg(allocs.next(r1), allocs.next(r2)), + &AMode::RegScaled(r1, r2, ty) => AMode::RegScaled(allocs.next(r1), allocs.next(r2), ty), + &AMode::RegScaledExtended(r1, r2, ty, ext) => { + AMode::RegScaledExtended(allocs.next(r1), allocs.next(r2), ty, ext) + } + &AMode::RegExtended(r1, r2, ext) => { + AMode::RegExtended(allocs.next(r1), allocs.next(r2), ext) + } + &AMode::PreIndexed(reg, simm9) => AMode::PreIndexed(allocs.next_writable(reg), simm9), + &AMode::PostIndexed(reg, simm9) => AMode::PostIndexed(allocs.next_writable(reg), simm9), + &AMode::RegOffset(r, off, ty) => AMode::RegOffset(allocs.next(r), off, ty), + &AMode::FPOffset(..) + | &AMode::SPOffset(..) + | &AMode::NominalSPOffset(..) + | AMode::Label(..) => self.clone(), + } + } } /// A memory argument to a load/store-pair. @@ -232,6 +252,23 @@ pub enum PairAMode { PostIndexed(Writable, SImm7Scaled), } +impl PairAMode { + pub fn with_allocs(&self, allocs: &mut AllocationConsumer<'_>) -> Self { + // Should match `pairmemarg_operands()`. + match self { + &PairAMode::SignedOffset(reg, simm7scaled) => { + PairAMode::SignedOffset(allocs.next(reg), simm7scaled) + } + &PairAMode::PreIndexed(reg, simm7scaled) => { + PairAMode::PreIndexed(allocs.next_writable(reg), simm7scaled) + } + &PairAMode::PostIndexed(reg, simm7scaled) => { + PairAMode::PostIndexed(allocs.next_writable(reg), simm7scaled) + } + } + } +} + //============================================================================= // Instruction sub-components (conditions, branches and branch targets): // definitions @@ -362,19 +399,19 @@ impl BranchTarget { } impl PrettyPrint for ShiftOpAndAmt { - fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String { + fn pretty_print(&self, _: u8, _: &mut AllocationConsumer<'_>) -> String { format!("{:?} {}", self.op(), self.amt().value()) } } impl PrettyPrint for ExtendOp { - fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String { + fn pretty_print(&self, _: u8, _: &mut AllocationConsumer<'_>) -> String { format!("{:?}", self) } } impl PrettyPrint for MemLabel { - fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String { + fn pretty_print(&self, _: u8, _: &mut AllocationConsumer<'_>) -> String { match self { &MemLabel::PCRel(off) => format!("pc+{}", off), } @@ -393,33 +430,36 @@ fn shift_for_type(ty: Type) -> usize { } impl PrettyPrint for AMode { - fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String { + fn pretty_print(&self, _: u8, allocs: &mut AllocationConsumer<'_>) -> String { match self { &AMode::Unscaled(reg, simm9) => { + let reg = pretty_print_reg(reg, allocs); if simm9.value != 0 { - format!("[{}, {}]", reg.show_rru(mb_rru), simm9.show_rru(mb_rru)) + let simm9 = simm9.pretty_print(8, allocs); + format!("[{}, {}]", reg, simm9) } else { - format!("[{}]", reg.show_rru(mb_rru)) + format!("[{}]", reg) } } &AMode::UnsignedOffset(reg, uimm12) => { + let reg = pretty_print_reg(reg, allocs); if uimm12.value != 0 { - format!("[{}, {}]", reg.show_rru(mb_rru), uimm12.show_rru(mb_rru)) + let uimm12 = uimm12.pretty_print(8, allocs); + format!("[{}, {}]", reg, uimm12) } else { - format!("[{}]", reg.show_rru(mb_rru)) + format!("[{}]", reg) } } &AMode::RegReg(r1, r2) => { - format!("[{}, {}]", r1.show_rru(mb_rru), r2.show_rru(mb_rru),) + let r1 = pretty_print_reg(r1, allocs); + let r2 = pretty_print_reg(r2, allocs); + format!("[{}, {}]", r1, r2) } &AMode::RegScaled(r1, r2, ty) => { + let r1 = pretty_print_reg(r1, allocs); + let r2 = pretty_print_reg(r2, allocs); let shift = shift_for_type(ty); - format!( - "[{}, {}, LSL #{}]", - r1.show_rru(mb_rru), - r2.show_rru(mb_rru), - shift, - ) + format!("[{}, {}, LSL #{}]", r1, r2, shift) } &AMode::RegScaledExtended(r1, r2, ty, op) => { let shift = shift_for_type(ty); @@ -427,39 +467,32 @@ impl PrettyPrint for AMode { ExtendOp::SXTW | ExtendOp::UXTW => OperandSize::Size32, _ => OperandSize::Size64, }; - let op = op.show_rru(mb_rru); - format!( - "[{}, {}, {} #{}]", - r1.show_rru(mb_rru), - show_ireg_sized(r2, mb_rru, size), - op, - shift - ) + let r1 = pretty_print_reg(r1, allocs); + let r2 = pretty_print_ireg(r2, size, allocs); + let op = op.pretty_print(0, allocs); + format!("[{}, {}, {} #{}]", r1, r2, op, shift) } &AMode::RegExtended(r1, r2, op) => { let size = match op { ExtendOp::SXTW | ExtendOp::UXTW => OperandSize::Size32, _ => OperandSize::Size64, }; - let op = op.show_rru(mb_rru); - format!( - "[{}, {}, {}]", - r1.show_rru(mb_rru), - show_ireg_sized(r2, mb_rru, size), - op, - ) + let r1 = pretty_print_reg(r1, allocs); + let r2 = pretty_print_ireg(r2, size, allocs); + let op = op.pretty_print(0, allocs); + format!("[{}, {}, {}]", r1, r2, op) + } + &AMode::Label(ref label) => label.pretty_print(0, allocs), + &AMode::PreIndexed(r, simm9) => { + let r = pretty_print_reg(r.to_reg(), allocs); + let simm9 = simm9.pretty_print(8, allocs); + format!("[{}, {}]!", r, simm9) + } + &AMode::PostIndexed(r, simm9) => { + let r = pretty_print_reg(r.to_reg(), allocs); + let simm9 = simm9.pretty_print(8, allocs); + format!("[{}], {}", r, simm9) } - &AMode::Label(ref label) => label.show_rru(mb_rru), - &AMode::PreIndexed(r, simm9) => format!( - "[{}, {}]!", - r.to_reg().show_rru(mb_rru), - simm9.show_rru(mb_rru) - ), - &AMode::PostIndexed(r, simm9) => format!( - "[{}], {}", - r.to_reg().show_rru(mb_rru), - simm9.show_rru(mb_rru) - ), // Eliminated by `mem_finalize()`. &AMode::SPOffset(..) | &AMode::FPOffset(..) @@ -472,31 +505,33 @@ impl PrettyPrint for AMode { } impl PrettyPrint for PairAMode { - fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String { + fn pretty_print(&self, _: u8, allocs: &mut AllocationConsumer<'_>) -> String { match self { &PairAMode::SignedOffset(reg, simm7) => { + let reg = pretty_print_reg(reg, allocs); if simm7.value != 0 { - format!("[{}, {}]", reg.show_rru(mb_rru), simm7.show_rru(mb_rru)) + let simm7 = simm7.pretty_print(8, allocs); + format!("[{}, {}]", reg, simm7) } else { - format!("[{}]", reg.show_rru(mb_rru)) + format!("[{}]", reg) } } - &PairAMode::PreIndexed(reg, simm7) => format!( - "[{}, {}]!", - reg.to_reg().show_rru(mb_rru), - simm7.show_rru(mb_rru) - ), - &PairAMode::PostIndexed(reg, simm7) => format!( - "[{}], {}", - reg.to_reg().show_rru(mb_rru), - simm7.show_rru(mb_rru) - ), + &PairAMode::PreIndexed(reg, simm7) => { + let reg = pretty_print_reg(reg.to_reg(), allocs); + let simm7 = simm7.pretty_print(8, allocs); + format!("[{}, {}]!", reg, simm7) + } + &PairAMode::PostIndexed(reg, simm7) => { + let reg = pretty_print_reg(reg.to_reg(), allocs); + let simm7 = simm7.pretty_print(8, allocs); + format!("[{}], {}", reg, simm7) + } } } } impl PrettyPrint for Cond { - fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String { + fn pretty_print(&self, _: u8, _: &mut AllocationConsumer<'_>) -> String { let mut s = format!("{:?}", self); s.make_ascii_lowercase(); s @@ -504,7 +539,7 @@ impl PrettyPrint for Cond { } impl PrettyPrint for BranchTarget { - fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String { + fn pretty_print(&self, _: u8, _: &mut AllocationConsumer<'_>) -> String { match self { &BranchTarget::Label(label) => format!("label{:?}", label.get()), &BranchTarget::ResolvedOffset(off) => format!("{}", off), diff --git a/cranelift/codegen/src/isa/aarch64/inst/emit.rs b/cranelift/codegen/src/isa/aarch64/inst/emit.rs index 560a5b5d77..d354e00b75 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/emit.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/emit.rs @@ -1,14 +1,13 @@ //! AArch64 ISA: binary code emission. +use regalloc2::Allocation; + use crate::binemit::{CodeOffset, Reloc, StackMap}; use crate::ir::constant::ConstantData; use crate::ir::types::*; use crate::ir::{LibCall, MemFlags, TrapCode}; use crate::isa::aarch64::inst::*; -use crate::machinst::ty_bits; - -use regalloc::{Reg, RegClass, Writable}; - +use crate::machinst::{ty_bits, Reg, RegClass, Writable}; use core::convert::TryFrom; /// Memory label/reference finalization: convert a MemLabel to a PC-relative @@ -98,17 +97,17 @@ pub fn u64_constant(bits: u64) -> ConstantData { // Instructions and subcomponents: emission fn machreg_to_gpr(m: Reg) -> u32 { - assert_eq!(m.get_class(), RegClass::I64); - u32::try_from(m.to_real_reg().get_hw_encoding()).unwrap() + assert_eq!(m.class(), RegClass::Int); + u32::try_from(m.to_real_reg().unwrap().hw_enc() & 31).unwrap() } fn machreg_to_vec(m: Reg) -> u32 { - assert_eq!(m.get_class(), RegClass::V128); - u32::try_from(m.to_real_reg().get_hw_encoding()).unwrap() + assert_eq!(m.class(), RegClass::Float); + u32::try_from(m.to_real_reg().unwrap().hw_enc()).unwrap() } fn machreg_to_gpr_or_vec(m: Reg) -> u32 { - u32::try_from(m.to_real_reg().get_hw_encoding()).unwrap() + u32::try_from(m.to_real_reg().unwrap().hw_enc() & 31).unwrap() } pub(crate) fn enc_arith_rrr( @@ -168,10 +167,20 @@ fn enc_cbr(op_31_24: u32, off_18_0: u32, op_4: u32, cond: u32) -> u32 { (op_31_24 << 24) | (off_18_0 << 5) | (op_4 << 4) | cond } -fn enc_conditional_br(taken: BranchTarget, kind: CondBrKind) -> u32 { +fn enc_conditional_br( + taken: BranchTarget, + kind: CondBrKind, + allocs: &mut AllocationConsumer<'_>, +) -> u32 { match kind { - CondBrKind::Zero(reg) => enc_cmpbr(0b1_011010_0, taken.as_offset19_or_zero(), reg), - CondBrKind::NotZero(reg) => enc_cmpbr(0b1_011010_1, taken.as_offset19_or_zero(), reg), + CondBrKind::Zero(reg) => { + let reg = allocs.next(reg); + enc_cmpbr(0b1_011010_0, taken.as_offset19_or_zero(), reg) + } + CondBrKind::NotZero(reg) => { + let reg = allocs.next(reg); + enc_cmpbr(0b1_011010_1, taken.as_offset19_or_zero(), reg) + } CondBrKind::Cond(c) => enc_cbr(0b01010100, taken.as_offset19_or_zero(), 0b0, c.bits()), } } @@ -674,7 +683,15 @@ impl MachInstEmit for Inst { type State = EmitState; type Info = EmitInfo; - fn emit(&self, sink: &mut MachBuffer, emit_info: &Self::Info, state: &mut EmitState) { + fn emit( + &self, + allocs: &[Allocation], + sink: &mut MachBuffer, + emit_info: &Self::Info, + state: &mut EmitState, + ) { + let mut allocs = AllocationConsumer::new(allocs); + // N.B.: we *must* not exceed the "worst-case size" used to compute // where to insert islands, except when islands are explicitly triggered // (with an `EmitIsland`). We check this in debug builds. This is `mut` @@ -690,6 +707,10 @@ impl MachInstEmit for Inst { rn, rm, } => { + let rd = allocs.next_writable(rd); + let rn = allocs.next(rn); + let rm = allocs.next(rm); + debug_assert!(match alu_op { ALUOp::SDiv | ALUOp::UDiv | ALUOp::SMulH | ALUOp::UMulH => size == OperandSize::Size64, @@ -742,6 +763,11 @@ impl MachInstEmit for Inst { rn, ra, } => { + let rd = allocs.next_writable(rd); + let rn = allocs.next(rn); + let rm = allocs.next(rm); + let ra = allocs.next(ra); + let (top11, bit15) = match alu_op { ALUOp3::MAdd32 => (0b0_00_11011_000, 0), ALUOp3::MSub32 => (0b0_00_11011_000, 1), @@ -757,6 +783,8 @@ impl MachInstEmit for Inst { rn, ref imm12, } => { + let rd = allocs.next_writable(rd); + let rn = allocs.next(rn); let top8 = match alu_op { ALUOp::Add => 0b000_10001, ALUOp::Sub => 0b010_10001, @@ -780,6 +808,8 @@ impl MachInstEmit for Inst { rn, ref imml, } => { + let rd = allocs.next_writable(rd); + let rn = allocs.next(rn); let (top9, inv) = match alu_op { ALUOp::Orr => (0b001_100100, false), ALUOp::And => (0b000_100100, false), @@ -802,6 +832,8 @@ impl MachInstEmit for Inst { rn, ref immshift, } => { + let rd = allocs.next_writable(rd); + let rn = allocs.next(rn); let amt = immshift.value(); let (top10, immr, imms) = match alu_op { ALUOp::RotR => (0b0001001110, machreg_to_gpr(rn), u32::from(amt)), @@ -839,6 +871,9 @@ impl MachInstEmit for Inst { rm, ref shiftop, } => { + let rd = allocs.next_writable(rd); + let rn = allocs.next(rn); + let rm = allocs.next(rm); let top11: u32 = match alu_op { ALUOp::Add => 0b000_01011000, ALUOp::AddS => 0b001_01011000, @@ -867,6 +902,9 @@ impl MachInstEmit for Inst { rm, extendop, } => { + let rd = allocs.next_writable(rd); + let rn = allocs.next(rn); + let rm = allocs.next(rm); let top11: u32 = match alu_op { ALUOp::Add => 0b00001011001, ALUOp::Sub => 0b01001011001, @@ -882,6 +920,8 @@ impl MachInstEmit for Inst { &Inst::BitRR { op, size, rd, rn, .. } => { + let rd = allocs.next_writable(rd); + let rn = allocs.next(rn); let (op1, op2) = match op { BitOp::RBit => (0b00000, 0b000000), BitOp::Clz => (0b00000, 0b000100), @@ -902,10 +942,12 @@ impl MachInstEmit for Inst { | &Inst::FpuLoad32 { rd, ref mem, flags } | &Inst::FpuLoad64 { rd, ref mem, flags } | &Inst::FpuLoad128 { rd, ref mem, flags } => { - let (mem_insts, mem) = mem_finalize(sink.cur_offset(), mem, state); + let rd = allocs.next_writable(rd); + let mem = mem.with_allocs(&mut allocs); + let (mem_insts, mem) = mem_finalize(sink.cur_offset(), &mem, state); for inst in mem_insts.into_iter() { - inst.emit(sink, emit_info, state); + inst.emit(&[], sink, emit_info, state); } // ldst encoding helpers take Reg, not Writable. @@ -936,20 +978,26 @@ impl MachInstEmit for Inst { match &mem { &AMode::Unscaled(reg, simm9) => { + let reg = allocs.next(reg); sink.put4(enc_ldst_simm9(op, simm9, 0b00, reg, rd)); } &AMode::UnsignedOffset(reg, uimm12scaled) => { + let reg = allocs.next(reg); if uimm12scaled.value() != 0 { assert_eq!(bits, ty_bits(uimm12scaled.scale_ty())); } sink.put4(enc_ldst_uimm12(op, uimm12scaled, reg, rd)); } &AMode::RegReg(r1, r2) => { + let r1 = allocs.next(r1); + let r2 = allocs.next(r2); sink.put4(enc_ldst_reg( op, r1, r2, /* scaled = */ false, /* extendop = */ None, rd, )); } &AMode::RegScaled(r1, r2, ty) | &AMode::RegScaledExtended(r1, r2, ty, _) => { + let r1 = allocs.next(r1); + let r2 = allocs.next(r2); assert_eq!(bits, ty_bits(ty)); let extendop = match &mem { &AMode::RegScaled(..) => None, @@ -961,6 +1009,8 @@ impl MachInstEmit for Inst { )); } &AMode::RegExtended(r1, r2, extendop) => { + let r1 = allocs.next(r1); + let r2 = allocs.next(r2); sink.put4(enc_ldst_reg( op, r1, @@ -999,10 +1049,12 @@ impl MachInstEmit for Inst { } } &AMode::PreIndexed(reg, simm9) => { - sink.put4(enc_ldst_simm9(op, simm9, 0b11, reg.to_reg(), rd)); + let reg = allocs.next(reg.to_reg()); + sink.put4(enc_ldst_simm9(op, simm9, 0b11, reg, rd)); } &AMode::PostIndexed(reg, simm9) => { - sink.put4(enc_ldst_simm9(op, simm9, 0b01, reg.to_reg(), rd)); + let reg = allocs.next(reg.to_reg()); + sink.put4(enc_ldst_simm9(op, simm9, 0b01, reg, rd)); } // Eliminated by `mem_finalize()` above. &AMode::SPOffset(..) | &AMode::FPOffset(..) | &AMode::NominalSPOffset(..) => { @@ -1019,10 +1071,12 @@ impl MachInstEmit for Inst { | &Inst::FpuStore32 { rd, ref mem, flags } | &Inst::FpuStore64 { rd, ref mem, flags } | &Inst::FpuStore128 { rd, ref mem, flags } => { - let (mem_insts, mem) = mem_finalize(sink.cur_offset(), mem, state); + let rd = allocs.next(rd); + let mem = mem.with_allocs(&mut allocs); + let (mem_insts, mem) = mem_finalize(sink.cur_offset(), &mem, state); for inst in mem_insts.into_iter() { - inst.emit(sink, emit_info, state); + inst.emit(&[], sink, emit_info, state); } let (op, bits) = match self { @@ -1044,20 +1098,26 @@ impl MachInstEmit for Inst { match &mem { &AMode::Unscaled(reg, simm9) => { + let reg = allocs.next(reg); sink.put4(enc_ldst_simm9(op, simm9, 0b00, reg, rd)); } &AMode::UnsignedOffset(reg, uimm12scaled) => { + let reg = allocs.next(reg); if uimm12scaled.value() != 0 { assert_eq!(bits, ty_bits(uimm12scaled.scale_ty())); } sink.put4(enc_ldst_uimm12(op, uimm12scaled, reg, rd)); } &AMode::RegReg(r1, r2) => { + let r1 = allocs.next(r1); + let r2 = allocs.next(r2); sink.put4(enc_ldst_reg( op, r1, r2, /* scaled = */ false, /* extendop = */ None, rd, )); } &AMode::RegScaled(r1, r2, _ty) | &AMode::RegScaledExtended(r1, r2, _ty, _) => { + let r1 = allocs.next(r1); + let r2 = allocs.next(r2); let extendop = match &mem { &AMode::RegScaled(..) => None, &AMode::RegScaledExtended(_, _, _, op) => Some(op), @@ -1068,6 +1128,8 @@ impl MachInstEmit for Inst { )); } &AMode::RegExtended(r1, r2, extendop) => { + let r1 = allocs.next(r1); + let r2 = allocs.next(r2); sink.put4(enc_ldst_reg( op, r1, @@ -1081,10 +1143,12 @@ impl MachInstEmit for Inst { panic!("Store to a MemLabel not implemented!"); } &AMode::PreIndexed(reg, simm9) => { - sink.put4(enc_ldst_simm9(op, simm9, 0b11, reg.to_reg(), rd)); + let reg = allocs.next(reg.to_reg()); + sink.put4(enc_ldst_simm9(op, simm9, 0b11, reg, rd)); } &AMode::PostIndexed(reg, simm9) => { - sink.put4(enc_ldst_simm9(op, simm9, 0b01, reg.to_reg(), rd)); + let reg = allocs.next(reg.to_reg()); + sink.put4(enc_ldst_simm9(op, simm9, 0b01, reg, rd)); } // Eliminated by `mem_finalize()` above. &AMode::SPOffset(..) | &AMode::FPOffset(..) | &AMode::NominalSPOffset(..) => { @@ -1100,23 +1164,29 @@ impl MachInstEmit for Inst { ref mem, flags, } => { + let rt = allocs.next(rt); + let rt2 = allocs.next(rt2); + let mem = mem.with_allocs(&mut allocs); let srcloc = state.cur_srcloc(); if srcloc != SourceLoc::default() && !flags.notrap() { // Register the offset at which the actual store instruction starts. sink.add_trap(srcloc, TrapCode::HeapOutOfBounds); } - match mem { + match &mem { &PairAMode::SignedOffset(reg, simm7) => { assert_eq!(simm7.scale_ty, I64); + let reg = allocs.next(reg); sink.put4(enc_ldst_pair(0b1010100100, simm7, reg, rt, rt2)); } &PairAMode::PreIndexed(reg, simm7) => { assert_eq!(simm7.scale_ty, I64); - sink.put4(enc_ldst_pair(0b1010100110, simm7, reg.to_reg(), rt, rt2)); + let reg = allocs.next(reg.to_reg()); + sink.put4(enc_ldst_pair(0b1010100110, simm7, reg, rt, rt2)); } &PairAMode::PostIndexed(reg, simm7) => { assert_eq!(simm7.scale_ty, I64); - sink.put4(enc_ldst_pair(0b1010100010, simm7, reg.to_reg(), rt, rt2)); + let reg = allocs.next(reg.to_reg()); + sink.put4(enc_ldst_pair(0b1010100010, simm7, reg, rt, rt2)); } } } @@ -1126,26 +1196,30 @@ impl MachInstEmit for Inst { ref mem, flags, } => { + let rt = allocs.next(rt.to_reg()); + let rt2 = allocs.next(rt2.to_reg()); + let mem = mem.with_allocs(&mut allocs); let srcloc = state.cur_srcloc(); if srcloc != SourceLoc::default() && !flags.notrap() { // Register the offset at which the actual load instruction starts. sink.add_trap(srcloc, TrapCode::HeapOutOfBounds); } - let rt = rt.to_reg(); - let rt2 = rt2.to_reg(); - match mem { + match &mem { &PairAMode::SignedOffset(reg, simm7) => { assert_eq!(simm7.scale_ty, I64); + let reg = allocs.next(reg); sink.put4(enc_ldst_pair(0b1010100101, simm7, reg, rt, rt2)); } &PairAMode::PreIndexed(reg, simm7) => { assert_eq!(simm7.scale_ty, I64); - sink.put4(enc_ldst_pair(0b1010100111, simm7, reg.to_reg(), rt, rt2)); + let reg = allocs.next(reg.to_reg()); + sink.put4(enc_ldst_pair(0b1010100111, simm7, reg, rt, rt2)); } &PairAMode::PostIndexed(reg, simm7) => { assert_eq!(simm7.scale_ty, I64); - sink.put4(enc_ldst_pair(0b1010100011, simm7, reg.to_reg(), rt, rt2)); + let reg = allocs.next(reg.to_reg()); + sink.put4(enc_ldst_pair(0b1010100011, simm7, reg, rt, rt2)); } } } @@ -1161,6 +1235,9 @@ impl MachInstEmit for Inst { ref mem, flags, } => { + let rt = allocs.next(rt.to_reg()); + let rt2 = allocs.next(rt2.to_reg()); + let mem = mem.with_allocs(&mut allocs); let srcloc = state.cur_srcloc(); if srcloc != SourceLoc::default() && !flags.notrap() { @@ -1173,37 +1250,22 @@ impl MachInstEmit for Inst { &Inst::FpuLoadP128 { .. } => 0b10, _ => unreachable!(), }; - let rt = rt.to_reg(); - let rt2 = rt2.to_reg(); - match mem { + match &mem { &PairAMode::SignedOffset(reg, simm7) => { assert!(simm7.scale_ty == F64 || simm7.scale_ty == I8X16); + let reg = allocs.next(reg); sink.put4(enc_ldst_vec_pair(opc, 0b10, true, simm7, reg, rt, rt2)); } &PairAMode::PreIndexed(reg, simm7) => { assert!(simm7.scale_ty == F64 || simm7.scale_ty == I8X16); - sink.put4(enc_ldst_vec_pair( - opc, - 0b11, - true, - simm7, - reg.to_reg(), - rt, - rt2, - )); + let reg = allocs.next(reg.to_reg()); + sink.put4(enc_ldst_vec_pair(opc, 0b11, true, simm7, reg, rt, rt2)); } &PairAMode::PostIndexed(reg, simm7) => { assert!(simm7.scale_ty == F64 || simm7.scale_ty == I8X16); - sink.put4(enc_ldst_vec_pair( - opc, - 0b01, - true, - simm7, - reg.to_reg(), - rt, - rt2, - )); + let reg = allocs.next(reg.to_reg()); + sink.put4(enc_ldst_vec_pair(opc, 0b01, true, simm7, reg, rt, rt2)); } } } @@ -1219,6 +1281,9 @@ impl MachInstEmit for Inst { ref mem, flags, } => { + let rt = allocs.next(rt); + let rt2 = allocs.next(rt2); + let mem = mem.with_allocs(&mut allocs); let srcloc = state.cur_srcloc(); if srcloc != SourceLoc::default() && !flags.notrap() { @@ -1232,40 +1297,29 @@ impl MachInstEmit for Inst { _ => unreachable!(), }; - match mem { + match &mem { &PairAMode::SignedOffset(reg, simm7) => { assert!(simm7.scale_ty == F64 || simm7.scale_ty == I8X16); + let reg = allocs.next(reg); sink.put4(enc_ldst_vec_pair(opc, 0b10, false, simm7, reg, rt, rt2)); } &PairAMode::PreIndexed(reg, simm7) => { assert!(simm7.scale_ty == F64 || simm7.scale_ty == I8X16); - sink.put4(enc_ldst_vec_pair( - opc, - 0b11, - false, - simm7, - reg.to_reg(), - rt, - rt2, - )); + let reg = allocs.next(reg.to_reg()); + sink.put4(enc_ldst_vec_pair(opc, 0b11, false, simm7, reg, rt, rt2)); } &PairAMode::PostIndexed(reg, simm7) => { assert!(simm7.scale_ty == F64 || simm7.scale_ty == I8X16); - sink.put4(enc_ldst_vec_pair( - opc, - 0b01, - false, - simm7, - reg.to_reg(), - rt, - rt2, - )); + let reg = allocs.next(reg.to_reg()); + sink.put4(enc_ldst_vec_pair(opc, 0b01, false, simm7, reg, rt, rt2)); } } } &Inst::Mov64 { rd, rm } => { - assert!(rd.to_reg().get_class() == rm.get_class()); - assert!(rm.get_class() == RegClass::I64); + let rd = allocs.next_writable(rd); + let rm = allocs.next(rm); + assert!(rd.to_reg().class() == rm.class()); + assert!(rm.class() == RegClass::Int); // MOV to SP is interpreted as MOV to XZR instead. And our codegen // should never MOV to XZR. @@ -1287,6 +1341,8 @@ impl MachInstEmit for Inst { } } &Inst::Mov32 { rd, rm } => { + let rd = allocs.next_writable(rd); + let rm = allocs.next(rm); // MOV to SP is interpreted as MOV to XZR instead. And our codegen // should never MOV to XZR. assert!(machreg_to_gpr(rd.to_reg()) != 31); @@ -1294,21 +1350,29 @@ impl MachInstEmit for Inst { sink.put4(enc_arith_rrr(0b00101010_000, 0b000_000, rd, zero_reg(), rm)); } &Inst::MovZ { rd, imm, size } => { + let rd = allocs.next_writable(rd); sink.put4(enc_move_wide(MoveWideOpcode::MOVZ, rd, imm, size)) } &Inst::MovN { rd, imm, size } => { + let rd = allocs.next_writable(rd); sink.put4(enc_move_wide(MoveWideOpcode::MOVN, rd, imm, size)) } &Inst::MovK { rd, imm, size } => { + let rd = allocs.next_writable(rd); sink.put4(enc_move_wide(MoveWideOpcode::MOVK, rd, imm, size)) } &Inst::CSel { rd, rn, rm, cond } => { + let rd = allocs.next_writable(rd); + let rn = allocs.next(rn); + let rm = allocs.next(rm); sink.put4(enc_csel(rd, rn, rm, cond)); } &Inst::CSet { rd, cond } => { + let rd = allocs.next_writable(rd); sink.put4(enc_cset(rd, cond)); } &Inst::CSetm { rd, cond } => { + let rd = allocs.next_writable(rd); sink.put4(enc_csetm(rd, cond)); } &Inst::CCmpImm { @@ -1318,9 +1382,13 @@ impl MachInstEmit for Inst { nzcv, cond, } => { + let rn = allocs.next(rn); sink.put4(enc_ccmp_imm(size, rn, imm, nzcv, cond)); } &Inst::AtomicRMW { ty, op, rs, rt, rn } => { + let rs = allocs.next(rs); + let rt = allocs.next_writable(rt); + let rn = allocs.next(rn); sink.put4(enc_ldal(ty, op, rs, rt, rn)); } &Inst::AtomicRMWLoop { ty, op } => { @@ -1375,7 +1443,7 @@ impl MachInstEmit for Inst { rn: x27, rm: x26, } - .emit(sink, emit_info, state); + .emit(&[], sink, emit_info, state); Inst::AluRRR { alu_op: ALUOp::OrrNot, @@ -1384,7 +1452,7 @@ impl MachInstEmit for Inst { rn: xzr, rm: x28, } - .emit(sink, emit_info, state); + .emit(&[], sink, emit_info, state); } AtomicRmwOp::Umin | AtomicRmwOp::Umax @@ -1408,7 +1476,7 @@ impl MachInstEmit for Inst { rn: x27, rm: x26, } - .emit(sink, emit_info, state); + .emit(&[], sink, emit_info, state); Inst::CSel { cond, @@ -1416,7 +1484,7 @@ impl MachInstEmit for Inst { rn: x27, rm: x26, } - .emit(sink, emit_info, state); + .emit(&[], sink, emit_info, state); } _ => { // add/sub/and/orr/eor x28, x27, x26 @@ -1441,7 +1509,7 @@ impl MachInstEmit for Inst { rn: x27, rm: x26, } - .emit(sink, emit_info, state); + .emit(&[], sink, emit_info, state); } } @@ -1462,10 +1530,14 @@ impl MachInstEmit for Inst { sink.put4(enc_conditional_br( BranchTarget::Label(again_label), CondBrKind::NotZero(x24), + &mut AllocationConsumer::default(), )); sink.use_label_at_offset(br_offset, again_label, LabelUse::Branch19); } &Inst::AtomicCAS { rs, rt, rn, ty } => { + let rs = allocs.next_writable(rs); + let rt = allocs.next(rt); + let rn = allocs.next(rn); let size = match ty { I8 => 0b00, I16 => 0b01, @@ -1526,6 +1598,7 @@ impl MachInstEmit for Inst { sink.put4(enc_conditional_br( BranchTarget::Label(out_label), CondBrKind::Cond(Cond::Ne), + &mut AllocationConsumer::default(), )); sink.use_label_at_offset(br_out_offset, out_label, LabelUse::Branch19); @@ -1542,6 +1615,7 @@ impl MachInstEmit for Inst { sink.put4(enc_conditional_br( BranchTarget::Label(again_label), CondBrKind::NotZero(x24), + &mut AllocationConsumer::default(), )); sink.use_label_at_offset(br_again_offset, again_label, LabelUse::Branch19); @@ -1549,21 +1623,31 @@ impl MachInstEmit for Inst { sink.bind_label(out_label); } &Inst::LoadAcquire { access_ty, rt, rn } => { + let rn = allocs.next(rn); + let rt = allocs.next_writable(rt); sink.put4(enc_ldar(access_ty, rt, rn)); } &Inst::StoreRelease { access_ty, rt, rn } => { + let rn = allocs.next(rn); + let rt = allocs.next(rt); sink.put4(enc_stlr(access_ty, rt, rn)); } &Inst::Fence {} => { sink.put4(enc_dmb_ish()); // dmb ish } &Inst::FpuMove64 { rd, rn } => { + let rd = allocs.next_writable(rd); + let rn = allocs.next(rn); sink.put4(enc_fpurr(0b000_11110_01_1_000000_10000, rd, rn)); } &Inst::FpuMove128 { rd, rn } => { + let rd = allocs.next_writable(rd); + let rn = allocs.next(rn); sink.put4(enc_vecmov(/* 16b = */ true, rd, rn)); } &Inst::FpuMoveFromVec { rd, rn, idx, size } => { + let rd = allocs.next_writable(rd); + let rn = allocs.next(rn); let (imm5, shift, mask) = match size.lane_size() { ScalarSize::Size32 => (0b00100, 3, 0b011), ScalarSize::Size64 => (0b01000, 4, 0b001), @@ -1579,6 +1663,8 @@ impl MachInstEmit for Inst { ); } &Inst::FpuExtend { rd, rn, size } => { + let rd = allocs.next_writable(rd); + let rn = allocs.next(rn); sink.put4(enc_fpurr( 0b000_11110_00_1_000000_10000 | (size.ftype() << 13), rd, @@ -1586,6 +1672,8 @@ impl MachInstEmit for Inst { )); } &Inst::FpuRR { fpu_op, rd, rn } => { + let rd = allocs.next_writable(rd); + let rn = allocs.next(rn); let top22 = match fpu_op { FPUOp1::Abs32 => 0b000_11110_00_1_000001_10000, FPUOp1::Abs64 => 0b000_11110_01_1_000001_10000, @@ -1599,6 +1687,9 @@ impl MachInstEmit for Inst { sink.put4(enc_fpurr(top22, rd, rn)); } &Inst::FpuRRR { fpu_op, rd, rn, rm } => { + let rd = allocs.next_writable(rd); + let rn = allocs.next(rn); + let rm = allocs.next(rm); let top22 = match fpu_op { FPUOp2::Add32 => 0b000_11110_00_1_00000_001010, FPUOp2::Add64 => 0b000_11110_01_1_00000_001010, @@ -1619,44 +1710,48 @@ impl MachInstEmit for Inst { }; sink.put4(enc_fpurrr(top22, rd, rn, rm)); } - &Inst::FpuRRI { fpu_op, rd, rn } => match fpu_op { - FPUOpRI::UShr32(imm) => { - debug_assert_eq!(32, imm.lane_size_in_bits); - sink.put4( - 0b0_0_1_011110_0000000_00_0_0_0_1_00000_00000 - | imm.enc() << 16 - | machreg_to_vec(rn) << 5 - | machreg_to_vec(rd.to_reg()), - ) + &Inst::FpuRRI { fpu_op, rd, rn } => { + let rd = allocs.next_writable(rd); + let rn = allocs.next(rn); + match fpu_op { + FPUOpRI::UShr32(imm) => { + debug_assert_eq!(32, imm.lane_size_in_bits); + sink.put4( + 0b0_0_1_011110_0000000_00_0_0_0_1_00000_00000 + | imm.enc() << 16 + | machreg_to_vec(rn) << 5 + | machreg_to_vec(rd.to_reg()), + ) + } + FPUOpRI::UShr64(imm) => { + debug_assert_eq!(64, imm.lane_size_in_bits); + sink.put4( + 0b01_1_111110_0000000_00_0_0_0_1_00000_00000 + | imm.enc() << 16 + | machreg_to_vec(rn) << 5 + | machreg_to_vec(rd.to_reg()), + ) + } + FPUOpRI::Sli64(imm) => { + debug_assert_eq!(64, imm.lane_size_in_bits); + sink.put4( + 0b01_1_111110_0000000_010101_00000_00000 + | imm.enc() << 16 + | machreg_to_vec(rn) << 5 + | machreg_to_vec(rd.to_reg()), + ) + } + FPUOpRI::Sli32(imm) => { + debug_assert_eq!(32, imm.lane_size_in_bits); + sink.put4( + 0b0_0_1_011110_0000000_010101_00000_00000 + | imm.enc() << 16 + | machreg_to_vec(rn) << 5 + | machreg_to_vec(rd.to_reg()), + ) + } } - FPUOpRI::UShr64(imm) => { - debug_assert_eq!(64, imm.lane_size_in_bits); - sink.put4( - 0b01_1_111110_0000000_00_0_0_0_1_00000_00000 - | imm.enc() << 16 - | machreg_to_vec(rn) << 5 - | machreg_to_vec(rd.to_reg()), - ) - } - FPUOpRI::Sli64(imm) => { - debug_assert_eq!(64, imm.lane_size_in_bits); - sink.put4( - 0b01_1_111110_0000000_010101_00000_00000 - | imm.enc() << 16 - | machreg_to_vec(rn) << 5 - | machreg_to_vec(rd.to_reg()), - ) - } - FPUOpRI::Sli32(imm) => { - debug_assert_eq!(32, imm.lane_size_in_bits); - sink.put4( - 0b0_0_1_011110_0000000_010101_00000_00000 - | imm.enc() << 16 - | machreg_to_vec(rn) << 5 - | machreg_to_vec(rd.to_reg()), - ) - } - }, + } &Inst::FpuRRRR { fpu_op, rd, @@ -1664,6 +1759,10 @@ impl MachInstEmit for Inst { rm, ra, } => { + let rd = allocs.next_writable(rd); + let rn = allocs.next(rn); + let rm = allocs.next(rm); + let ra = allocs.next(ra); let top17 = match fpu_op { FPUOp3::MAdd32 => 0b000_11111_00_0_00000_0, FPUOp3::MAdd64 => 0b000_11111_01_0_00000_0, @@ -1671,6 +1770,8 @@ impl MachInstEmit for Inst { sink.put4(enc_fpurrrr(top17, rd, rn, rm, ra)); } &Inst::VecMisc { op, rd, rn, size } => { + let rd = allocs.next_writable(rd); + let rn = allocs.next(rn); let (q, enc_size) = size.enc_size(); let (u, bits_12_16, size) = match op { VecMisc2::Not => (0b1, 0b00101, 0b00), @@ -1813,6 +1914,8 @@ impl MachInstEmit for Inst { sink.put4(enc_vec_rr_misc((q << 1) | u, size, bits_12_16, rd, rn)); } &Inst::VecLanes { op, rd, rn, size } => { + let rd = allocs.next_writable(rd); + let rn = allocs.next(rn); let (q, size) = match size { VectorSize::Size8x8 => (0b0, 0b00), VectorSize::Size8x16 => (0b1, 0b00), @@ -1834,6 +1937,8 @@ impl MachInstEmit for Inst { size, imm, } => { + let rd = allocs.next_writable(rd); + let rn = allocs.next(rn); let (is_shr, template) = match op { VecShiftImmOp::Ushr => (true, 0b_011_011110_0000_000_000001_00000_00000_u32), VecShiftImmOp::Sshr => (true, 0b_010_011110_0000_000_000001_00000_00000_u32), @@ -1869,6 +1974,9 @@ impl MachInstEmit for Inst { sink.put4(template | (immh_immb << 16) | (rn_enc << 5) | rd_enc); } &Inst::VecExtract { rd, rn, rm, imm4 } => { + let rd = allocs.next_writable(rd); + let rn = allocs.next(rn); + let rm = allocs.next(rm); if imm4 < 16 { let template = 0b_01_101110_000_00000_0_0000_0_00000_00000_u32; let rm_enc = machreg_to_vec(rm); @@ -1890,6 +1998,9 @@ impl MachInstEmit for Inst { rm, is_extension, } => { + let rn = allocs.next(rn); + let rm = allocs.next(rm); + let rd = allocs.next_writable(rd); sink.put4(enc_tbl(is_extension, 0b00, rd, rn, rm)); } &Inst::VecTbl2 { @@ -1899,16 +2010,26 @@ impl MachInstEmit for Inst { rm, is_extension, } => { + let rn = allocs.next(rn); + let rn2 = allocs.next(rn2); + let rm = allocs.next(rm); + let rd = allocs.next_writable(rd); assert_eq!(machreg_to_vec(rn2), (machreg_to_vec(rn) + 1) % 32); sink.put4(enc_tbl(is_extension, 0b01, rd, rn, rm)); } &Inst::FpuCmp32 { rn, rm } => { + let rn = allocs.next(rn); + let rm = allocs.next(rm); sink.put4(enc_fcmp(ScalarSize::Size32, rn, rm)); } &Inst::FpuCmp64 { rn, rm } => { + let rn = allocs.next(rn); + let rm = allocs.next(rm); sink.put4(enc_fcmp(ScalarSize::Size64, rn, rm)); } &Inst::FpuToInt { op, rd, rn } => { + let rd = allocs.next_writable(rd); + let rn = allocs.next(rn); let top16 = match op { // FCVTZS (32/32-bit) FpuToIntOp::F32ToI32 => 0b000_11110_00_1_11_000, @@ -1930,6 +2051,8 @@ impl MachInstEmit for Inst { sink.put4(enc_fputoint(top16, rd, rn)); } &Inst::IntToFpu { op, rd, rn } => { + let rd = allocs.next_writable(rd); + let rn = allocs.next(rn); let top16 = match op { // SCVTF (32/32-bit) IntToFpuOp::I32ToF32 => 0b000_11110_00_1_00_010, @@ -1951,41 +2074,51 @@ impl MachInstEmit for Inst { sink.put4(enc_inttofpu(top16, rd, rn)); } &Inst::LoadFpuConst64 { rd, const_data } => { + let rd = allocs.next_writable(rd); let inst = Inst::FpuLoad64 { rd, mem: AMode::Label(MemLabel::PCRel(8)), flags: MemFlags::trusted(), }; - inst.emit(sink, emit_info, state); + inst.emit(&[], sink, emit_info, state); let inst = Inst::Jump { dest: BranchTarget::ResolvedOffset(12), }; - inst.emit(sink, emit_info, state); + inst.emit(&[], sink, emit_info, state); sink.put8(const_data); } &Inst::LoadFpuConst128 { rd, const_data } => { + let rd = allocs.next_writable(rd); let inst = Inst::FpuLoad128 { rd, mem: AMode::Label(MemLabel::PCRel(8)), flags: MemFlags::trusted(), }; - inst.emit(sink, emit_info, state); + inst.emit(&[], sink, emit_info, state); let inst = Inst::Jump { dest: BranchTarget::ResolvedOffset(20), }; - inst.emit(sink, emit_info, state); + inst.emit(&[], sink, emit_info, state); for i in const_data.to_le_bytes().iter() { sink.put1(*i); } } &Inst::FpuCSel32 { rd, rn, rm, cond } => { + let rd = allocs.next_writable(rd); + let rn = allocs.next(rn); + let rm = allocs.next(rm); sink.put4(enc_fcsel(rd, rn, rm, cond, ScalarSize::Size32)); } &Inst::FpuCSel64 { rd, rn, rm, cond } => { + let rd = allocs.next_writable(rd); + let rn = allocs.next(rn); + let rm = allocs.next(rm); sink.put4(enc_fcsel(rd, rn, rm, cond, ScalarSize::Size64)); } &Inst::FpuRound { op, rd, rn } => { + let rd = allocs.next_writable(rd); + let rn = allocs.next(rn); let top22 = match op { FpuRoundMode::Minus32 => 0b000_11110_00_1_001_010_10000, FpuRoundMode::Minus64 => 0b000_11110_01_1_001_010_10000, @@ -1999,6 +2132,8 @@ impl MachInstEmit for Inst { sink.put4(enc_fround(top22, rd, rn)); } &Inst::MovToFpu { rd, rn, size } => { + let rd = allocs.next_writable(rd); + let rn = allocs.next(rn); let template = match size { ScalarSize::Size32 => 0b000_11110_00_1_00_111_000000_00000_00000, ScalarSize::Size64 => 0b100_11110_01_1_00_111_000000_00000_00000, @@ -2007,6 +2142,7 @@ impl MachInstEmit for Inst { sink.put4(template | (machreg_to_gpr(rn) << 5) | machreg_to_vec(rd.to_reg())); } &Inst::FpuMoveFPImm { rd, imm, size } => { + let rd = allocs.next_writable(rd); let size_code = match size { ScalarSize::Size32 => 0b00, ScalarSize::Size64 => 0b01, @@ -2020,6 +2156,8 @@ impl MachInstEmit for Inst { ); } &Inst::MovToVec { rd, rn, idx, size } => { + let rd = allocs.next_writable(rd); + let rn = allocs.next(rn); let (imm5, shift) = match size.lane_size() { ScalarSize::Size8 => (0b00001, 1), ScalarSize::Size16 => (0b00010, 2), @@ -2037,6 +2175,8 @@ impl MachInstEmit for Inst { ); } &Inst::MovFromVec { rd, rn, idx, size } => { + let rd = allocs.next_writable(rd); + let rn = allocs.next(rn); let (q, imm5, shift, mask) = match size { VectorSize::Size8x16 => (0b0, 0b00001, 1, 0b1111), VectorSize::Size16x8 => (0b0, 0b00010, 2, 0b0111), @@ -2061,6 +2201,8 @@ impl MachInstEmit for Inst { size, scalar_size, } => { + let rd = allocs.next_writable(rd); + let rn = allocs.next(rn); let (imm5, shift, half) = match size { VectorSize::Size8x8 => (0b00001, 1, true), VectorSize::Size8x16 => (0b00001, 1, false), @@ -2087,6 +2229,8 @@ impl MachInstEmit for Inst { ); } &Inst::VecDup { rd, rn, size } => { + let rd = allocs.next_writable(rd); + let rn = allocs.next(rn); let imm5 = match size { VectorSize::Size8x16 => 0b00001, VectorSize::Size16x8 => 0b00010, @@ -2102,6 +2246,8 @@ impl MachInstEmit for Inst { ); } &Inst::VecDupFromFpu { rd, rn, size } => { + let rd = allocs.next_writable(rd); + let rn = allocs.next(rn); let imm5 = match size { VectorSize::Size32x4 => 0b00100, VectorSize::Size64x2 => 0b01000, @@ -2115,6 +2261,7 @@ impl MachInstEmit for Inst { ); } &Inst::VecDupFPImm { rd, imm, size } => { + let rd = allocs.next_writable(rd); let imm = imm.enc_bits(); let op = match size.lane_size() { ScalarSize::Size32 => 0, @@ -2131,6 +2278,7 @@ impl MachInstEmit for Inst { invert, size, } => { + let rd = allocs.next_writable(rd); let (imm, shift, shift_ones) = imm.value(); let (op, cmode) = match size.lane_size() { ScalarSize::Size8 => { @@ -2178,6 +2326,8 @@ impl MachInstEmit for Inst { rn, high_half, } => { + let rd = allocs.next_writable(rd); + let rn = allocs.next(rn); let (u, immh) = match t { VecExtendOp::Sxtl8 => (0b0, 0b001), VecExtendOp::Sxtl16 => (0b0, 0b010), @@ -2201,6 +2351,8 @@ impl MachInstEmit for Inst { rn, high_half, } => { + let rd = allocs.next_writable(rd); + let rn = allocs.next(rn); let (u, size, bits_12_16) = match op { VecRRLongOp::Fcvtl16 => (0b0, 0b00, 0b10111), VecRRLongOp::Fcvtl32 => (0b0, 0b01, 0b10111), @@ -2223,6 +2375,8 @@ impl MachInstEmit for Inst { rn, high_half, } => { + let rn = allocs.next(rn); + let rd = allocs.next_writable(rd); let (u, size, bits_12_16) = match op { VecRRNarrowOp::Xtn16 => (0b0, 0b00, 0b10010), VecRRNarrowOp::Xtn32 => (0b0, 0b01, 0b10010), @@ -2255,6 +2409,8 @@ impl MachInstEmit for Inst { src_idx, size, } => { + let rd = allocs.next_writable(rd); + let rn = allocs.next(rn); let (imm5, shift) = match size.lane_size() { ScalarSize::Size8 => (0b00001, 1), ScalarSize::Size16 => (0b00010, 2), @@ -2276,6 +2432,8 @@ impl MachInstEmit for Inst { ); } &Inst::VecRRPair { op, rd, rn } => { + let rd = allocs.next_writable(rd); + let rn = allocs.next(rn); let bits_12_16 = match op { VecPairOp::Addp => 0b11011, }; @@ -2289,6 +2447,9 @@ impl MachInstEmit for Inst { alu_op, high_half, } => { + let rd = allocs.next_writable(rd); + let rn = allocs.next(rn); + let rm = allocs.next(rm); let (u, size, bit14) = match alu_op { VecRRRLongOp::Smull8 => (0b0, 0b00, 0b1), VecRRRLongOp::Smull16 => (0b0, 0b01, 0b1), @@ -2311,6 +2472,8 @@ impl MachInstEmit for Inst { )); } &Inst::VecRRPairLong { op, rd, rn } => { + let rd = allocs.next_writable(rd); + let rn = allocs.next(rn); let (u, size) = match op { VecRRPairLongOp::Saddlp8 => (0b0, 0b0), VecRRPairLongOp::Uaddlp8 => (0b1, 0b0), @@ -2327,6 +2490,9 @@ impl MachInstEmit for Inst { alu_op, size, } => { + let rd = allocs.next_writable(rd); + let rn = allocs.next(rn); + let rm = allocs.next(rm); let (q, enc_size) = size.enc_size(); let is_float = match alu_op { VecALUOp::Fcmeq @@ -2431,6 +2597,8 @@ impl MachInstEmit for Inst { sink.put4(enc_vec_rrr(top11 | q << 9, rm, bit15_10, rn, rd)); } &Inst::VecLoadReplicate { rd, rn, size } => { + let rd = allocs.next_writable(rd); + let rn = allocs.next(rn); let (q, size) = size.enc_size(); let srcloc = state.cur_srcloc(); @@ -2442,6 +2610,9 @@ impl MachInstEmit for Inst { sink.put4(enc_ldst_vec(q, size, rn, rd)); } &Inst::VecCSel { rd, rn, rm, cond } => { + let rd = allocs.next_writable(rd); + let rn = allocs.next(rn); + let rm = allocs.next(rm); /* Emit this: b.cond else mov rd, rm @@ -2460,6 +2631,7 @@ impl MachInstEmit for Inst { sink.put4(enc_conditional_br( BranchTarget::Label(else_label), CondBrKind::Cond(cond), + &mut AllocationConsumer::default(), )); sink.use_label_at_offset(br_else_offset, else_label, LabelUse::Branch19); @@ -2482,9 +2654,11 @@ impl MachInstEmit for Inst { sink.bind_label(out_label); } &Inst::MovToNZCV { rn } => { + let rn = allocs.next(rn); sink.put4(0xd51b4200 | machreg_to_gpr(rn)); } &Inst::MovFromNZCV { rd } => { + let rd = allocs.next_writable(rd); sink.put4(0xd53b4200 | machreg_to_gpr(rd.to_reg())); } &Inst::Extend { @@ -2494,6 +2668,8 @@ impl MachInstEmit for Inst { from_bits: 1, to_bits, } => { + let rd = allocs.next_writable(rd); + let rn = allocs.next(rn); assert!(to_bits <= 64); // Reduce zero-extend-from-1-bit to: // - and rd, rn, #1 @@ -2507,7 +2683,7 @@ impl MachInstEmit for Inst { rn, imml, } - .emit(sink, emit_info, state); + .emit(&[], sink, emit_info, state); } &Inst::Extend { rd, @@ -2516,8 +2692,10 @@ impl MachInstEmit for Inst { from_bits: 32, to_bits: 64, } => { + let rd = allocs.next_writable(rd); + let rn = allocs.next(rn); let mov = Inst::Mov32 { rd, rm: rn }; - mov.emit(sink, emit_info, state); + mov.emit(&[], sink, emit_info, state); } &Inst::Extend { rd, @@ -2526,6 +2704,8 @@ impl MachInstEmit for Inst { from_bits, to_bits, } => { + let rd = allocs.next_writable(rd); + let rn = allocs.next(rn); let (opc, size) = if signed { (0b00, OperandSize::from_bits(to_bits)) } else { @@ -2543,7 +2723,7 @@ impl MachInstEmit for Inst { // Emit the jump itself. sink.put4(enc_jump26(0b000101, dest.as_offset26_or_zero())); } - &Inst::Ret => { + &Inst::Ret { .. } => { sink.put4(0xd65f03c0); } &Inst::EpiloguePlaceholder => { @@ -2564,7 +2744,8 @@ impl MachInstEmit for Inst { if let Some(s) = state.take_stack_map() { sink.add_stack_map(StackMapExtent::UpcomingBytes(4), s); } - sink.put4(0b1101011_0001_11111_000000_00000_00000 | (machreg_to_gpr(info.rn) << 5)); + let rn = allocs.next(info.rn); + sink.put4(0b1101011_0001_11111_000000_00000_00000 | (machreg_to_gpr(rn) << 5)); let loc = state.cur_srcloc(); if info.opcode.is_call() { sink.add_call_site(loc, info.opcode); @@ -2579,10 +2760,12 @@ impl MachInstEmit for Inst { let cond_off = sink.cur_offset(); if let Some(l) = taken.as_label() { sink.use_label_at_offset(cond_off, l, LabelUse::Branch19); - let inverted = enc_conditional_br(taken, kind.invert()).to_le_bytes(); + let mut allocs_inv = allocs.clone(); + let inverted = + enc_conditional_br(taken, kind.invert(), &mut allocs_inv).to_le_bytes(); sink.add_cond_branch(cond_off, cond_off + 4, l, &inverted[..]); } - sink.put4(enc_conditional_br(taken, kind)); + sink.put4(enc_conditional_br(taken, kind, &mut allocs)); // Unconditional part next. let uncond_off = sink.cur_offset(); @@ -2599,15 +2782,17 @@ impl MachInstEmit for Inst { sink.put4(enc_conditional_br( BranchTarget::Label(label), kind.invert(), + &mut allocs, )); sink.use_label_at_offset(off, label, LabelUse::Branch19); // udf let trap = Inst::Udf { trap_code }; - trap.emit(sink, emit_info, state); + trap.emit(&[], sink, emit_info, state); // LABEL: sink.bind_label(label); } &Inst::IndirectBr { rn, .. } => { + let rn = allocs.next(rn); sink.put4(enc_br(rn)); } &Inst::Nop0 => {} @@ -2626,6 +2811,7 @@ impl MachInstEmit for Inst { sink.put4(0xd4a00000); } &Inst::Adr { rd, off } => { + let rd = allocs.next_writable(rd); assert!(off > -(1 << 20)); assert!(off < (1 << 20)); sink.put4(enc_adr(off, rd)); @@ -2643,12 +2829,19 @@ impl MachInstEmit for Inst { ref info, .. } => { + let ridx = allocs.next(ridx); + let rtmp1 = allocs.next_writable(rtmp1); + let rtmp2 = allocs.next_writable(rtmp2); // This sequence is *one* instruction in the vcode, and is expanded only here at // emission time, because we cannot allow the regalloc to insert spills/reloads in // the middle; we depend on hardcoded PC-rel addressing below. // Branch to default when condition code from prior comparison indicates. - let br = enc_conditional_br(info.default_target, CondBrKind::Cond(Cond::Hs)); + let br = enc_conditional_br( + info.default_target, + CondBrKind::Cond(Cond::Hs), + &mut AllocationConsumer::default(), + ); // No need to inform the sink's branch folding logic about this branch, because it // will not be merged with any other branch, flipped, or elided (it is not preceded // or succeeded by any other branch). Just emit it with the label use. @@ -2661,10 +2854,10 @@ impl MachInstEmit for Inst { // Save index in a tmp (the live range of ridx only goes to start of this // sequence; rtmp1 or rtmp2 may overwrite it). let inst = Inst::gen_move(rtmp2, ridx, I64); - inst.emit(sink, emit_info, state); + inst.emit(&[], sink, emit_info, state); // Load address of jump table let inst = Inst::Adr { rd: rtmp1, off: 16 }; - inst.emit(sink, emit_info, state); + inst.emit(&[], sink, emit_info, state); // Load value out of jump table let inst = Inst::SLoad32 { rd: rtmp2, @@ -2676,7 +2869,7 @@ impl MachInstEmit for Inst { ), flags: MemFlags::trusted(), }; - inst.emit(sink, emit_info, state); + inst.emit(&[], sink, emit_info, state); // Add base of jump table to jump-table-sourced block offset let inst = Inst::AluRRR { alu_op: ALUOp::Add, @@ -2685,14 +2878,14 @@ impl MachInstEmit for Inst { rn: rtmp1.to_reg(), rm: rtmp2.to_reg(), }; - inst.emit(sink, emit_info, state); + inst.emit(&[], sink, emit_info, state); // Branch to computed address. (`targets` here is only used for successor queries // and is not needed for emission.) let inst = Inst::IndirectBr { rn: rtmp1.to_reg(), targets: vec![], }; - inst.emit(sink, emit_info, state); + inst.emit(&[], sink, emit_info, state); // Emit jump table (table of 32-bit offsets). let jt_off = sink.cur_offset(); for &target in info.targets.iter() { @@ -2719,16 +2912,17 @@ impl MachInstEmit for Inst { ref name, offset, } => { + let rd = allocs.next_writable(rd); let inst = Inst::ULoad64 { rd, mem: AMode::Label(MemLabel::PCRel(8)), flags: MemFlags::trusted(), }; - inst.emit(sink, emit_info, state); + inst.emit(&[], sink, emit_info, state); let inst = Inst::Jump { dest: BranchTarget::ResolvedOffset(12), }; - inst.emit(sink, emit_info, state); + inst.emit(&[], sink, emit_info, state); let srcloc = state.cur_srcloc(); sink.add_reloc(srcloc, Reloc::Abs8, name, offset); if emit_info.0.emit_all_ones_funcaddrs() { @@ -2738,15 +2932,24 @@ impl MachInstEmit for Inst { } } &Inst::LoadAddr { rd, ref mem } => { - let (mem_insts, mem) = mem_finalize(sink.cur_offset(), mem, state); + let rd = allocs.next_writable(rd); + let mem = mem.with_allocs(&mut allocs); + let (mem_insts, mem) = mem_finalize(sink.cur_offset(), &mem, state); for inst in mem_insts.into_iter() { - inst.emit(sink, emit_info, state); + inst.emit(&[], sink, emit_info, state); } let (reg, index_reg, offset) = match mem { - AMode::RegExtended(r, idx, extendop) => (r, Some((idx, extendop)), 0), - AMode::Unscaled(r, simm9) => (r, None, simm9.value()), + AMode::RegExtended(r, idx, extendop) => { + let r = allocs.next(r); + (r, Some((idx, extendop)), 0) + } + AMode::Unscaled(r, simm9) => { + let r = allocs.next(r); + (r, None, simm9.value()) + } AMode::UnsignedOffset(r, uimm12scaled) => { + let r = allocs.next(r); (r, None, uimm12scaled.value() as i32) } _ => panic!("Unsupported case for LoadAddr: {:?}", mem), @@ -2768,12 +2971,12 @@ impl MachInstEmit for Inst { extendop, }; - add.emit(sink, emit_info, state); + add.emit(&[], sink, emit_info, state); } else if offset == 0 { if reg != rd.to_reg() { let mov = Inst::Mov64 { rd, rm: reg }; - mov.emit(sink, emit_info, state); + mov.emit(&[], sink, emit_info, state); } } else if let Some(imm12) = Imm12::maybe_from_u64(abs_offset) { let add = Inst::AluRRImm12 { @@ -2783,7 +2986,7 @@ impl MachInstEmit for Inst { rn: reg, imm12, }; - add.emit(sink, emit_info, state); + add.emit(&[], sink, emit_info, state); } else { // Use `tmp2` here: `reg` may be `spilltmp` if the `AMode` on this instruction // was initially an `SPOffset`. Assert that `tmp2` is truly free to use. Note @@ -2794,7 +2997,7 @@ impl MachInstEmit for Inst { debug_assert!(reg != tmp2_reg()); let tmp = writable_tmp2_reg(); for insn in Inst::load_constant(tmp, abs_offset).into_iter() { - insn.emit(sink, emit_info, state); + insn.emit(&[], sink, emit_info, state); } let add = Inst::AluRRR { alu_op, @@ -2803,7 +3006,7 @@ impl MachInstEmit for Inst { rn: reg, rm: tmp.to_reg(), }; - add.emit(sink, emit_info, state); + add.emit(&[], sink, emit_info, state); } } &Inst::VirtualSPOffsetAdj { offset } => { @@ -2820,7 +3023,7 @@ impl MachInstEmit for Inst { let jmp = Inst::Jump { dest: BranchTarget::Label(jump_around_label), }; - jmp.emit(sink, emit_info, state); + jmp.emit(&[], sink, emit_info, state); sink.emit_island(needed_space + 4); sink.bind_label(jump_around_label); } @@ -2851,13 +3054,11 @@ impl MachInstEmit for Inst { sink.put4(0xd503201f); } - &Inst::ValueLabelMarker { .. } => { - // Nothing; this is only used to compute debug info. - } - &Inst::Unwind { ref inst } => { sink.add_unwind(inst.clone()); } + + &Inst::DummyUse { .. } => {} } let end_off = sink.cur_offset(); @@ -2866,7 +3067,8 @@ impl MachInstEmit for Inst { state.clear_post_insn(); } - fn pretty_print(&self, mb_rru: Option<&RealRegUniverse>, state: &mut EmitState) -> String { - self.print_with_state(mb_rru, state) + fn pretty_print_inst(&self, allocs: &[Allocation], state: &mut Self::State) -> String { + let mut allocs = AllocationConsumer::new(allocs); + self.print_with_state(state, &mut allocs) } } diff --git a/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs b/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs index 9cc917192c..918ca3ffaf 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs @@ -27,7 +27,7 @@ fn test_aarch64_binemit() { // Then: // // $ echo "mov x1, x2" | aarch64inst.sh - insns.push((Inst::Ret, "C0035FD6", "ret")); + insns.push((Inst::Ret { rets: vec![] }, "C0035FD6", "ret")); insns.push((Inst::Nop0, "", "nop-zero-len")); insns.push((Inst::Nop4, "1F2003D5", "nop")); insns.push(( @@ -1631,7 +1631,7 @@ fn test_aarch64_binemit() { flags: MemFlags::trusted(), }, "E18040F8", - "ldur x1, [x7, #8]", + "ldr x1, [x7, #8]", )); insns.push(( @@ -6794,7 +6794,6 @@ fn test_aarch64_binemit() { insns.push((Inst::Fence {}, "BF3B03D5", "dmb ish")); let flags = settings::Flags::new(settings::builder()); - let rru = create_reg_universe(&flags); let emit_info = EmitInfo::new(flags); for (insn, expected_encoding, expected_printing) in insns { println!( @@ -6803,11 +6802,12 @@ fn test_aarch64_binemit() { ); // Check the printed text is as expected. - let actual_printing = insn.show_rru(Some(&rru)); + let actual_printing = + insn.print_with_state(&mut EmitState::default(), &mut AllocationConsumer::new(&[])); assert_eq!(expected_printing, actual_printing); let mut buffer = MachBuffer::new(); - insn.emit(&mut buffer, &emit_info, &mut Default::default()); + insn.emit(&[], &mut buffer, &emit_info, &mut Default::default()); let buffer = buffer.finish(); let actual_encoding = &buffer.stringify_code_bytes(); assert_eq!(expected_encoding, actual_encoding); diff --git a/cranelift/codegen/src/isa/aarch64/inst/imms.rs b/cranelift/codegen/src/isa/aarch64/inst/imms.rs index beed2f40de..c6a6fed300 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/imms.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/imms.rs @@ -5,8 +5,7 @@ use crate::ir::types::*; use crate::ir::Type; use crate::isa::aarch64::inst::{OperandSize, ScalarSize}; - -use regalloc::{PrettyPrint, RealRegUniverse}; +use crate::machinst::{AllocationConsumer, PrettyPrint}; use core::convert::TryFrom; use std::string::String; @@ -871,7 +870,7 @@ impl ASIMDFPModImm { } impl PrettyPrint for NZCV { - fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String { + fn pretty_print(&self, _: u8, _: &mut AllocationConsumer<'_>) -> String { let fmt = |c: char, v| if v { c.to_ascii_uppercase() } else { c }; format!( "#{}{}{}{}", @@ -884,13 +883,13 @@ impl PrettyPrint for NZCV { } impl PrettyPrint for UImm5 { - fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String { + fn pretty_print(&self, _: u8, _: &mut AllocationConsumer<'_>) -> String { format!("#{}", self.value) } } impl PrettyPrint for Imm12 { - fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String { + fn pretty_print(&self, _: u8, _: &mut AllocationConsumer<'_>) -> String { let shift = if self.shift12 { 12 } else { 0 }; let value = u32::from(self.bits) << shift; format!("#{}", value) @@ -898,49 +897,49 @@ impl PrettyPrint for Imm12 { } impl PrettyPrint for SImm7Scaled { - fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String { + fn pretty_print(&self, _: u8, _: &mut AllocationConsumer<'_>) -> String { format!("#{}", self.value) } } impl PrettyPrint for FPULeftShiftImm { - fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String { + fn pretty_print(&self, _: u8, _: &mut AllocationConsumer<'_>) -> String { format!("#{}", self.amount) } } impl PrettyPrint for FPURightShiftImm { - fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String { + fn pretty_print(&self, _: u8, _: &mut AllocationConsumer<'_>) -> String { format!("#{}", self.amount) } } impl PrettyPrint for SImm9 { - fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String { + fn pretty_print(&self, _: u8, _: &mut AllocationConsumer<'_>) -> String { format!("#{}", self.value) } } impl PrettyPrint for UImm12Scaled { - fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String { + fn pretty_print(&self, _: u8, _: &mut AllocationConsumer<'_>) -> String { format!("#{}", self.value) } } impl PrettyPrint for ImmLogic { - fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String { + fn pretty_print(&self, _: u8, _: &mut AllocationConsumer<'_>) -> String { format!("#{}", self.value()) } } impl PrettyPrint for ImmShift { - fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String { + fn pretty_print(&self, _: u8, _: &mut AllocationConsumer<'_>) -> String { format!("#{}", self.imm) } } impl PrettyPrint for MoveWideConst { - fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String { + fn pretty_print(&self, _: u8, _: &mut AllocationConsumer<'_>) -> String { if self.shift == 0 { format!("#{}", self.bits) } else { @@ -950,7 +949,7 @@ impl PrettyPrint for MoveWideConst { } impl PrettyPrint for ASIMDMovModImm { - fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String { + fn pretty_print(&self, _: u8, _: &mut AllocationConsumer<'_>) -> String { if self.is_64bit { debug_assert_eq!(self.shift, 0); @@ -974,7 +973,7 @@ impl PrettyPrint for ASIMDMovModImm { } impl PrettyPrint for ASIMDFPModImm { - fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String { + fn pretty_print(&self, _: u8, _: &mut AllocationConsumer<'_>) -> String { if self.is_64bit { format!("#{}", f64::from_bits(Self::value64(self.imm))) } else { diff --git a/cranelift/codegen/src/isa/aarch64/inst/mod.rs b/cranelift/codegen/src/isa/aarch64/inst/mod.rs index a0af36d6cb..18d70e527d 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/mod.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/mod.rs @@ -7,16 +7,16 @@ use crate::binemit::{Addend, CodeOffset, Reloc}; use crate::ir::types::{ B1, B128, B16, B32, B64, B8, F32, F64, FFLAGS, I128, I16, I32, I64, I8, I8X16, IFLAGS, R32, R64, }; -use crate::ir::{ExternalName, MemFlags, Opcode, SourceLoc, Type, ValueLabel}; +use crate::ir::{types, ExternalName, MemFlags, Opcode, SourceLoc, Type}; use crate::isa::CallConv; use crate::machinst::*; use crate::{settings, CodegenError, CodegenResult}; -use regalloc::RegUsageCollector; -use regalloc::{PrettyPrint, RealRegUniverse, Reg, RegClass, SpillSlot, VirtualReg, Writable}; +use crate::machinst::{PrettyPrint, Reg, RegClass, Writable}; use alloc::vec::Vec; use core::convert::TryFrom; +use regalloc2::VReg; use smallvec::{smallvec, SmallVec}; use std::string::{String, ToString}; @@ -531,83 +531,84 @@ impl Inst { //============================================================================= // Instructions: get_regs -fn memarg_regs(memarg: &AMode, collector: &mut RegUsageCollector) { +fn memarg_operands VReg>(memarg: &AMode, collector: &mut OperandCollector<'_, F>) { + // This should match `AMode::with_allocs()`. match memarg { &AMode::Unscaled(reg, ..) | &AMode::UnsignedOffset(reg, ..) => { - collector.add_use(reg); + collector.reg_use(reg); } &AMode::RegReg(r1, r2, ..) | &AMode::RegScaled(r1, r2, ..) | &AMode::RegScaledExtended(r1, r2, ..) | &AMode::RegExtended(r1, r2, ..) => { - collector.add_use(r1); - collector.add_use(r2); + collector.reg_use(r1); + collector.reg_use(r2); } &AMode::Label(..) => {} &AMode::PreIndexed(reg, ..) | &AMode::PostIndexed(reg, ..) => { - collector.add_mod(reg); - } - &AMode::FPOffset(..) => { - collector.add_use(fp_reg()); - } - &AMode::SPOffset(..) | &AMode::NominalSPOffset(..) => { - collector.add_use(stack_reg()); + collector.reg_mod(reg); } + &AMode::FPOffset(..) => {} + &AMode::SPOffset(..) | &AMode::NominalSPOffset(..) => {} &AMode::RegOffset(r, ..) => { - collector.add_use(r); + collector.reg_use(r); } } } -fn pairmemarg_regs(pairmemarg: &PairAMode, collector: &mut RegUsageCollector) { +fn pairmemarg_operands VReg>( + pairmemarg: &PairAMode, + collector: &mut OperandCollector<'_, F>, +) { + // This should match `PairAMode::with_allocs()`. match pairmemarg { &PairAMode::SignedOffset(reg, ..) => { - collector.add_use(reg); + collector.reg_use(reg); } &PairAMode::PreIndexed(reg, ..) | &PairAMode::PostIndexed(reg, ..) => { - collector.add_mod(reg); + collector.reg_mod(reg); } } } -fn aarch64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) { +fn aarch64_get_operands VReg>(inst: &Inst, collector: &mut OperandCollector<'_, F>) { match inst { &Inst::AluRRR { rd, rn, rm, .. } => { - collector.add_def(rd); - collector.add_use(rn); - collector.add_use(rm); + collector.reg_def(rd); + collector.reg_use(rn); + collector.reg_use(rm); } &Inst::AluRRRR { rd, rn, rm, ra, .. } => { - collector.add_def(rd); - collector.add_use(rn); - collector.add_use(rm); - collector.add_use(ra); + collector.reg_def(rd); + collector.reg_use(rn); + collector.reg_use(rm); + collector.reg_use(ra); } &Inst::AluRRImm12 { rd, rn, .. } => { - collector.add_def(rd); - collector.add_use(rn); + collector.reg_def(rd); + collector.reg_use(rn); } &Inst::AluRRImmLogic { rd, rn, .. } => { - collector.add_def(rd); - collector.add_use(rn); + collector.reg_def(rd); + collector.reg_use(rn); } &Inst::AluRRImmShift { rd, rn, .. } => { - collector.add_def(rd); - collector.add_use(rn); + collector.reg_def(rd); + collector.reg_use(rn); } &Inst::AluRRRShift { rd, rn, rm, .. } => { - collector.add_def(rd); - collector.add_use(rn); - collector.add_use(rm); + collector.reg_def(rd); + collector.reg_use(rn); + collector.reg_use(rm); } &Inst::AluRRRExtend { rd, rn, rm, .. } => { - collector.add_def(rd); - collector.add_use(rn); - collector.add_use(rm); + collector.reg_def(rd); + collector.reg_use(rn); + collector.reg_use(rm); } &Inst::BitRR { rd, rn, .. } => { - collector.add_def(rd); - collector.add_use(rn); + collector.reg_def(rd); + collector.reg_use(rn); } &Inst::ULoad8 { rd, ref mem, .. } | &Inst::SLoad8 { rd, ref mem, .. } @@ -616,145 +617,143 @@ fn aarch64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) { | &Inst::ULoad32 { rd, ref mem, .. } | &Inst::SLoad32 { rd, ref mem, .. } | &Inst::ULoad64 { rd, ref mem, .. } => { - collector.add_def(rd); - memarg_regs(mem, collector); + collector.reg_def(rd); + memarg_operands(mem, collector); } &Inst::Store8 { rd, ref mem, .. } | &Inst::Store16 { rd, ref mem, .. } | &Inst::Store32 { rd, ref mem, .. } | &Inst::Store64 { rd, ref mem, .. } => { - collector.add_use(rd); - memarg_regs(mem, collector); + collector.reg_use(rd); + memarg_operands(mem, collector); } &Inst::StoreP64 { rt, rt2, ref mem, .. } => { - collector.add_use(rt); - collector.add_use(rt2); - pairmemarg_regs(mem, collector); + collector.reg_use(rt); + collector.reg_use(rt2); + pairmemarg_operands(mem, collector); } &Inst::LoadP64 { rt, rt2, ref mem, .. } => { - collector.add_def(rt); - collector.add_def(rt2); - pairmemarg_regs(mem, collector); + collector.reg_def(rt); + collector.reg_def(rt2); + pairmemarg_operands(mem, collector); } &Inst::Mov64 { rd, rm } => { - collector.add_def(rd); - collector.add_use(rm); + collector.reg_def(rd); + collector.reg_use(rm); } &Inst::Mov32 { rd, rm } => { - collector.add_def(rd); - collector.add_use(rm); + collector.reg_def(rd); + collector.reg_use(rm); } &Inst::MovZ { rd, .. } | &Inst::MovN { rd, .. } => { - collector.add_def(rd); + collector.reg_def(rd); } &Inst::MovK { rd, .. } => { - collector.add_mod(rd); + collector.reg_mod(rd); } &Inst::CSel { rd, rn, rm, .. } => { - collector.add_def(rd); - collector.add_use(rn); - collector.add_use(rm); + collector.reg_def(rd); + collector.reg_use(rn); + collector.reg_use(rm); } &Inst::CSet { rd, .. } | &Inst::CSetm { rd, .. } => { - collector.add_def(rd); + collector.reg_def(rd); } &Inst::CCmpImm { rn, .. } => { - collector.add_use(rn); + collector.reg_use(rn); } - &Inst::AtomicRMWLoop { op, .. } => { - collector.add_use(xreg(25)); - collector.add_use(xreg(26)); - collector.add_def(writable_xreg(24)); - collector.add_def(writable_xreg(27)); - if op != AtomicRmwOp::Xchg { - collector.add_def(writable_xreg(28)); - } + &Inst::AtomicRMWLoop { .. } => { + collector.reg_use(xreg(25)); + collector.reg_use(xreg(26)); + collector.reg_def(writable_xreg(24)); + collector.reg_def(writable_xreg(27)); + collector.reg_def(writable_xreg(28)); } &Inst::AtomicRMW { rs, rt, rn, .. } => { - collector.add_use(rs); - collector.add_def(rt); - collector.add_use(rn); + collector.reg_use(rs); + collector.reg_def(rt); + collector.reg_use(rn); } &Inst::AtomicCAS { rs, rt, rn, .. } => { - collector.add_mod(rs); - collector.add_use(rt); - collector.add_use(rn); + collector.reg_mod(rs); + collector.reg_use(rt); + collector.reg_use(rn); } &Inst::AtomicCASLoop { .. } => { - collector.add_use(xreg(25)); - collector.add_use(xreg(26)); - collector.add_use(xreg(28)); - collector.add_def(writable_xreg(24)); - collector.add_def(writable_xreg(27)); + collector.reg_use(xreg(25)); + collector.reg_use(xreg(26)); + collector.reg_use(xreg(28)); + collector.reg_def(writable_xreg(24)); + collector.reg_def(writable_xreg(27)); } &Inst::LoadAcquire { rt, rn, .. } => { - collector.add_use(rn); - collector.add_def(rt); + collector.reg_use(rn); + collector.reg_def(rt); } &Inst::StoreRelease { rt, rn, .. } => { - collector.add_use(rn); - collector.add_use(rt); + collector.reg_use(rn); + collector.reg_use(rt); } &Inst::Fence {} => {} &Inst::FpuMove64 { rd, rn } => { - collector.add_def(rd); - collector.add_use(rn); + collector.reg_def(rd); + collector.reg_use(rn); } &Inst::FpuMove128 { rd, rn } => { - collector.add_def(rd); - collector.add_use(rn); + collector.reg_def(rd); + collector.reg_use(rn); } &Inst::FpuMoveFromVec { rd, rn, .. } => { - collector.add_def(rd); - collector.add_use(rn); + collector.reg_def(rd); + collector.reg_use(rn); } &Inst::FpuExtend { rd, rn, .. } => { - collector.add_def(rd); - collector.add_use(rn); + collector.reg_def(rd); + collector.reg_use(rn); } &Inst::FpuRR { rd, rn, .. } => { - collector.add_def(rd); - collector.add_use(rn); + collector.reg_def(rd); + collector.reg_use(rn); } &Inst::FpuRRR { rd, rn, rm, .. } => { - collector.add_def(rd); - collector.add_use(rn); - collector.add_use(rm); + collector.reg_def(rd); + collector.reg_use(rn); + collector.reg_use(rm); } &Inst::FpuRRI { fpu_op, rd, rn, .. } => { match fpu_op { - FPUOpRI::UShr32(..) | FPUOpRI::UShr64(..) => collector.add_def(rd), - FPUOpRI::Sli32(..) | FPUOpRI::Sli64(..) => collector.add_mod(rd), + FPUOpRI::UShr32(..) | FPUOpRI::UShr64(..) => collector.reg_def(rd), + FPUOpRI::Sli32(..) | FPUOpRI::Sli64(..) => collector.reg_mod(rd), } - collector.add_use(rn); + collector.reg_use(rn); } &Inst::FpuRRRR { rd, rn, rm, ra, .. } => { - collector.add_def(rd); - collector.add_use(rn); - collector.add_use(rm); - collector.add_use(ra); + collector.reg_def(rd); + collector.reg_use(rn); + collector.reg_use(rm); + collector.reg_use(ra); } &Inst::VecMisc { rd, rn, .. } => { - collector.add_def(rd); - collector.add_use(rn); + collector.reg_def(rd); + collector.reg_use(rn); } &Inst::VecLanes { rd, rn, .. } => { - collector.add_def(rd); - collector.add_use(rn); + collector.reg_def(rd); + collector.reg_use(rn); } &Inst::VecShiftImm { rd, rn, .. } => { - collector.add_def(rd); - collector.add_use(rn); + collector.reg_def(rd); + collector.reg_use(rn); } &Inst::VecExtract { rd, rn, rm, .. } => { - collector.add_def(rd); - collector.add_use(rn); - collector.add_use(rm); + collector.reg_def(rd); + collector.reg_use(rn); + collector.reg_use(rm); } &Inst::VecTbl { rd, @@ -762,13 +761,13 @@ fn aarch64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) { rm, is_extension, } => { - collector.add_use(rn); - collector.add_use(rm); + collector.reg_use(rn); + collector.reg_use(rm); if is_extension { - collector.add_mod(rd); + collector.reg_mod(rd); } else { - collector.add_def(rd); + collector.reg_def(rd); } } &Inst::VecTbl2 { @@ -778,1089 +777,258 @@ fn aarch64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) { rm, is_extension, } => { - collector.add_use(rn); - collector.add_use(rn2); - collector.add_use(rm); + collector.reg_use(rn); + collector.reg_use(rn2); + collector.reg_use(rm); if is_extension { - collector.add_mod(rd); + collector.reg_mod(rd); } else { - collector.add_def(rd); + collector.reg_def(rd); } } &Inst::VecLoadReplicate { rd, rn, .. } => { - collector.add_def(rd); - collector.add_use(rn); + collector.reg_def(rd); + collector.reg_use(rn); } &Inst::VecCSel { rd, rn, rm, .. } => { - collector.add_def(rd); - collector.add_use(rn); - collector.add_use(rm); + collector.reg_def(rd); + collector.reg_use(rn); + collector.reg_use(rm); } &Inst::FpuCmp32 { rn, rm } | &Inst::FpuCmp64 { rn, rm } => { - collector.add_use(rn); - collector.add_use(rm); + collector.reg_use(rn); + collector.reg_use(rm); } &Inst::FpuLoad32 { rd, ref mem, .. } => { - collector.add_def(rd); - memarg_regs(mem, collector); + collector.reg_def(rd); + memarg_operands(mem, collector); } &Inst::FpuLoad64 { rd, ref mem, .. } => { - collector.add_def(rd); - memarg_regs(mem, collector); + collector.reg_def(rd); + memarg_operands(mem, collector); } &Inst::FpuLoad128 { rd, ref mem, .. } => { - collector.add_def(rd); - memarg_regs(mem, collector); + collector.reg_def(rd); + memarg_operands(mem, collector); } &Inst::FpuStore32 { rd, ref mem, .. } => { - collector.add_use(rd); - memarg_regs(mem, collector); + collector.reg_use(rd); + memarg_operands(mem, collector); } &Inst::FpuStore64 { rd, ref mem, .. } => { - collector.add_use(rd); - memarg_regs(mem, collector); + collector.reg_use(rd); + memarg_operands(mem, collector); } &Inst::FpuStore128 { rd, ref mem, .. } => { - collector.add_use(rd); - memarg_regs(mem, collector); + collector.reg_use(rd); + memarg_operands(mem, collector); } &Inst::FpuLoadP64 { rt, rt2, ref mem, .. } => { - collector.add_def(rt); - collector.add_def(rt2); - pairmemarg_regs(mem, collector); + collector.reg_def(rt); + collector.reg_def(rt2); + pairmemarg_operands(mem, collector); } &Inst::FpuStoreP64 { rt, rt2, ref mem, .. } => { - collector.add_use(rt); - collector.add_use(rt2); - pairmemarg_regs(mem, collector); + collector.reg_use(rt); + collector.reg_use(rt2); + pairmemarg_operands(mem, collector); } &Inst::FpuLoadP128 { rt, rt2, ref mem, .. } => { - collector.add_def(rt); - collector.add_def(rt2); - pairmemarg_regs(mem, collector); + collector.reg_def(rt); + collector.reg_def(rt2); + pairmemarg_operands(mem, collector); } &Inst::FpuStoreP128 { rt, rt2, ref mem, .. } => { - collector.add_use(rt); - collector.add_use(rt2); - pairmemarg_regs(mem, collector); + collector.reg_use(rt); + collector.reg_use(rt2); + pairmemarg_operands(mem, collector); } &Inst::LoadFpuConst64 { rd, .. } | &Inst::LoadFpuConst128 { rd, .. } => { - collector.add_def(rd); + collector.reg_def(rd); } &Inst::FpuToInt { rd, rn, .. } => { - collector.add_def(rd); - collector.add_use(rn); + collector.reg_def(rd); + collector.reg_use(rn); } &Inst::IntToFpu { rd, rn, .. } => { - collector.add_def(rd); - collector.add_use(rn); + collector.reg_def(rd); + collector.reg_use(rn); } &Inst::FpuCSel32 { rd, rn, rm, .. } | &Inst::FpuCSel64 { rd, rn, rm, .. } => { - collector.add_def(rd); - collector.add_use(rn); - collector.add_use(rm); + collector.reg_def(rd); + collector.reg_use(rn); + collector.reg_use(rm); } &Inst::FpuRound { rd, rn, .. } => { - collector.add_def(rd); - collector.add_use(rn); + collector.reg_def(rd); + collector.reg_use(rn); } &Inst::MovToFpu { rd, rn, .. } => { - collector.add_def(rd); - collector.add_use(rn); + collector.reg_def(rd); + collector.reg_use(rn); } &Inst::FpuMoveFPImm { rd, .. } => { - collector.add_def(rd); + collector.reg_def(rd); } &Inst::MovToVec { rd, rn, .. } => { - collector.add_mod(rd); - collector.add_use(rn); + collector.reg_mod(rd); + collector.reg_use(rn); } &Inst::MovFromVec { rd, rn, .. } | &Inst::MovFromVecSigned { rd, rn, .. } => { - collector.add_def(rd); - collector.add_use(rn); + collector.reg_def(rd); + collector.reg_use(rn); } &Inst::VecDup { rd, rn, .. } => { - collector.add_def(rd); - collector.add_use(rn); + collector.reg_def(rd); + collector.reg_use(rn); } &Inst::VecDupFromFpu { rd, rn, .. } => { - collector.add_def(rd); - collector.add_use(rn); + collector.reg_def(rd); + collector.reg_use(rn); } &Inst::VecDupFPImm { rd, .. } => { - collector.add_def(rd); + collector.reg_def(rd); } &Inst::VecDupImm { rd, .. } => { - collector.add_def(rd); + collector.reg_def(rd); } &Inst::VecExtend { rd, rn, .. } => { - collector.add_def(rd); - collector.add_use(rn); + collector.reg_def(rd); + collector.reg_use(rn); } &Inst::VecMovElement { rd, rn, .. } => { - collector.add_mod(rd); - collector.add_use(rn); + collector.reg_mod(rd); + collector.reg_use(rn); } &Inst::VecRRLong { rd, rn, .. } => { - collector.add_def(rd); - collector.add_use(rn); + collector.reg_def(rd); + collector.reg_use(rn); } &Inst::VecRRNarrow { rd, rn, high_half, .. } => { - collector.add_use(rn); + collector.reg_use(rn); if high_half { - collector.add_mod(rd); + collector.reg_mod(rd); } else { - collector.add_def(rd); + collector.reg_def(rd); } } &Inst::VecRRPair { rd, rn, .. } => { - collector.add_def(rd); - collector.add_use(rn); + collector.reg_def(rd); + collector.reg_use(rn); } &Inst::VecRRRLong { alu_op, rd, rn, rm, .. } => { match alu_op { VecRRRLongOp::Umlal8 | VecRRRLongOp::Umlal16 | VecRRRLongOp::Umlal32 => { - collector.add_mod(rd) + collector.reg_mod(rd) } - _ => collector.add_def(rd), + _ => collector.reg_def(rd), }; - collector.add_use(rn); - collector.add_use(rm); + collector.reg_use(rn); + collector.reg_use(rm); } &Inst::VecRRPairLong { rd, rn, .. } => { - collector.add_def(rd); - collector.add_use(rn); + collector.reg_def(rd); + collector.reg_use(rn); } &Inst::VecRRR { alu_op, rd, rn, rm, .. } => { if alu_op == VecALUOp::Bsl { - collector.add_mod(rd); + collector.reg_mod(rd); } else { - collector.add_def(rd); + collector.reg_def(rd); } - collector.add_use(rn); - collector.add_use(rm); + collector.reg_use(rn); + collector.reg_use(rm); } &Inst::MovToNZCV { rn } => { - collector.add_use(rn); + collector.reg_use(rn); } &Inst::MovFromNZCV { rd } => { - collector.add_def(rd); + collector.reg_def(rd); } &Inst::Extend { rd, rn, .. } => { - collector.add_def(rd); - collector.add_use(rn); + collector.reg_def(rd); + collector.reg_use(rn); } - &Inst::Jump { .. } | &Inst::Ret | &Inst::EpiloguePlaceholder => {} + &Inst::Ret { ref rets } => { + for &ret in rets { + collector.reg_use(ret); + } + } + &Inst::Jump { .. } | &Inst::EpiloguePlaceholder => {} &Inst::Call { ref info, .. } => { - collector.add_uses(&*info.uses); - collector.add_defs(&*info.defs); + collector.reg_uses(&info.uses[..]); + collector.reg_defs(&info.defs[..]); } &Inst::CallInd { ref info, .. } => { - collector.add_uses(&*info.uses); - collector.add_defs(&*info.defs); - collector.add_use(info.rn); + collector.reg_use(info.rn); + collector.reg_uses(&info.uses[..]); + collector.reg_defs(&info.defs[..]); } &Inst::CondBr { ref kind, .. } => match kind { CondBrKind::Zero(rt) | CondBrKind::NotZero(rt) => { - collector.add_use(*rt); + collector.reg_use(*rt); } CondBrKind::Cond(_) => {} }, &Inst::IndirectBr { rn, .. } => { - collector.add_use(rn); + collector.reg_use(rn); } &Inst::Nop0 | Inst::Nop4 => {} &Inst::Brk => {} &Inst::Udf { .. } => {} &Inst::TrapIf { ref kind, .. } => match kind { CondBrKind::Zero(rt) | CondBrKind::NotZero(rt) => { - collector.add_use(*rt); + collector.reg_use(*rt); } CondBrKind::Cond(_) => {} }, &Inst::Adr { rd, .. } => { - collector.add_def(rd); + collector.reg_def(rd); } &Inst::Word4 { .. } | &Inst::Word8 { .. } => {} &Inst::JTSequence { ridx, rtmp1, rtmp2, .. } => { - collector.add_use(ridx); - collector.add_def(rtmp1); - collector.add_def(rtmp2); + collector.reg_use(ridx); + collector.reg_early_def(rtmp1); + collector.reg_early_def(rtmp2); } &Inst::LoadExtName { rd, .. } => { - collector.add_def(rd); + collector.reg_def(rd); } &Inst::LoadAddr { rd, ref mem } => { - collector.add_def(rd); - memarg_regs(mem, collector); + collector.reg_def(rd); + memarg_operands(mem, collector); } &Inst::VirtualSPOffsetAdj { .. } => {} - &Inst::ValueLabelMarker { reg, .. } => { - collector.add_use(reg); - } &Inst::ElfTlsGetAddr { .. } => { for reg in AArch64MachineDeps::get_regs_clobbered_by_call(CallConv::SystemV) { - collector.add_def(reg); + collector.reg_def(reg); } } &Inst::Unwind { .. } => {} &Inst::EmitIsland { .. } => {} - } -} - -//============================================================================= -// Instructions: map_regs - -pub fn aarch64_map_regs(inst: &mut Inst, mapper: &RM) { - fn map_mem(m: &RM, mem: &mut AMode) { - // N.B.: we take only the pre-map here, but this is OK because the - // only addressing modes that update registers (pre/post-increment on - // AArch64) both read and write registers, so they are "mods" rather - // than "defs", so must be the same in both the pre- and post-map. - match mem { - &mut AMode::Unscaled(ref mut reg, ..) => m.map_use(reg), - &mut AMode::UnsignedOffset(ref mut reg, ..) => m.map_use(reg), - &mut AMode::RegReg(ref mut r1, ref mut r2) - | &mut AMode::RegScaled(ref mut r1, ref mut r2, ..) - | &mut AMode::RegScaledExtended(ref mut r1, ref mut r2, ..) - | &mut AMode::RegExtended(ref mut r1, ref mut r2, ..) => { - m.map_use(r1); - m.map_use(r2); - } - &mut AMode::Label(..) => {} - &mut AMode::PreIndexed(ref mut r, ..) => m.map_mod(r), - &mut AMode::PostIndexed(ref mut r, ..) => m.map_mod(r), - &mut AMode::FPOffset(..) - | &mut AMode::SPOffset(..) - | &mut AMode::NominalSPOffset(..) => {} - &mut AMode::RegOffset(ref mut r, ..) => m.map_use(r), - }; - } - - fn map_pairmem(m: &RM, mem: &mut PairAMode) { - match mem { - &mut PairAMode::SignedOffset(ref mut reg, ..) => m.map_use(reg), - &mut PairAMode::PreIndexed(ref mut reg, ..) => m.map_def(reg), - &mut PairAMode::PostIndexed(ref mut reg, ..) => m.map_def(reg), + &Inst::DummyUse { reg } => { + collector.reg_use(reg); } } - - fn map_br(m: &RM, br: &mut CondBrKind) { - match br { - &mut CondBrKind::Zero(ref mut reg) => m.map_use(reg), - &mut CondBrKind::NotZero(ref mut reg) => m.map_use(reg), - &mut CondBrKind::Cond(..) => {} - }; - } - - match inst { - &mut Inst::AluRRR { - ref mut rd, - ref mut rn, - ref mut rm, - .. - } => { - mapper.map_def(rd); - mapper.map_use(rn); - mapper.map_use(rm); - } - &mut Inst::AluRRRR { - ref mut rd, - ref mut rn, - ref mut rm, - ref mut ra, - .. - } => { - mapper.map_def(rd); - mapper.map_use(rn); - mapper.map_use(rm); - mapper.map_use(ra); - } - &mut Inst::AluRRImm12 { - ref mut rd, - ref mut rn, - .. - } => { - mapper.map_def(rd); - mapper.map_use(rn); - } - &mut Inst::AluRRImmLogic { - ref mut rd, - ref mut rn, - .. - } => { - mapper.map_def(rd); - mapper.map_use(rn); - } - &mut Inst::AluRRImmShift { - ref mut rd, - ref mut rn, - .. - } => { - mapper.map_def(rd); - mapper.map_use(rn); - } - &mut Inst::AluRRRShift { - ref mut rd, - ref mut rn, - ref mut rm, - .. - } => { - mapper.map_def(rd); - mapper.map_use(rn); - mapper.map_use(rm); - } - &mut Inst::AluRRRExtend { - ref mut rd, - ref mut rn, - ref mut rm, - .. - } => { - mapper.map_def(rd); - mapper.map_use(rn); - mapper.map_use(rm); - } - &mut Inst::BitRR { - ref mut rd, - ref mut rn, - .. - } => { - mapper.map_def(rd); - mapper.map_use(rn); - } - &mut Inst::ULoad8 { - ref mut rd, - ref mut mem, - .. - } => { - mapper.map_def(rd); - map_mem(mapper, mem); - } - &mut Inst::SLoad8 { - ref mut rd, - ref mut mem, - .. - } => { - mapper.map_def(rd); - map_mem(mapper, mem); - } - &mut Inst::ULoad16 { - ref mut rd, - ref mut mem, - .. - } => { - mapper.map_def(rd); - map_mem(mapper, mem); - } - &mut Inst::SLoad16 { - ref mut rd, - ref mut mem, - .. - } => { - mapper.map_def(rd); - map_mem(mapper, mem); - } - &mut Inst::ULoad32 { - ref mut rd, - ref mut mem, - .. - } => { - mapper.map_def(rd); - map_mem(mapper, mem); - } - &mut Inst::SLoad32 { - ref mut rd, - ref mut mem, - .. - } => { - mapper.map_def(rd); - map_mem(mapper, mem); - } - - &mut Inst::ULoad64 { - ref mut rd, - ref mut mem, - .. - } => { - mapper.map_def(rd); - map_mem(mapper, mem); - } - &mut Inst::Store8 { - ref mut rd, - ref mut mem, - .. - } => { - mapper.map_use(rd); - map_mem(mapper, mem); - } - &mut Inst::Store16 { - ref mut rd, - ref mut mem, - .. - } => { - mapper.map_use(rd); - map_mem(mapper, mem); - } - &mut Inst::Store32 { - ref mut rd, - ref mut mem, - .. - } => { - mapper.map_use(rd); - map_mem(mapper, mem); - } - &mut Inst::Store64 { - ref mut rd, - ref mut mem, - .. - } => { - mapper.map_use(rd); - map_mem(mapper, mem); - } - - &mut Inst::StoreP64 { - ref mut rt, - ref mut rt2, - ref mut mem, - .. - } => { - mapper.map_use(rt); - mapper.map_use(rt2); - map_pairmem(mapper, mem); - } - &mut Inst::LoadP64 { - ref mut rt, - ref mut rt2, - ref mut mem, - .. - } => { - mapper.map_def(rt); - mapper.map_def(rt2); - map_pairmem(mapper, mem); - } - &mut Inst::Mov64 { - ref mut rd, - ref mut rm, - } => { - mapper.map_def(rd); - mapper.map_use(rm); - } - &mut Inst::Mov32 { - ref mut rd, - ref mut rm, - } => { - mapper.map_def(rd); - mapper.map_use(rm); - } - &mut Inst::MovZ { ref mut rd, .. } => { - mapper.map_def(rd); - } - &mut Inst::MovN { ref mut rd, .. } => { - mapper.map_def(rd); - } - &mut Inst::MovK { ref mut rd, .. } => { - mapper.map_def(rd); - } - &mut Inst::CSel { - ref mut rd, - ref mut rn, - ref mut rm, - .. - } => { - mapper.map_def(rd); - mapper.map_use(rn); - mapper.map_use(rm); - } - &mut Inst::CSet { ref mut rd, .. } | &mut Inst::CSetm { ref mut rd, .. } => { - mapper.map_def(rd); - } - &mut Inst::CCmpImm { ref mut rn, .. } => { - mapper.map_use(rn); - } - &mut Inst::AtomicRMWLoop { .. } => { - // There are no vregs to map in this insn. - } - &mut Inst::AtomicRMW { - ref mut rs, - ref mut rt, - ref mut rn, - .. - } => { - mapper.map_use(rs); - mapper.map_def(rt); - mapper.map_use(rn); - } - &mut Inst::AtomicCAS { - ref mut rs, - ref mut rt, - ref mut rn, - .. - } => { - mapper.map_mod(rs); - mapper.map_use(rt); - mapper.map_use(rn); - } - &mut Inst::AtomicCASLoop { .. } => { - // There are no vregs to map in this insn. - } - &mut Inst::LoadAcquire { - ref mut rt, - ref mut rn, - .. - } => { - mapper.map_def(rt); - mapper.map_use(rn); - } - &mut Inst::StoreRelease { - ref mut rt, - ref mut rn, - .. - } => { - mapper.map_use(rt); - mapper.map_use(rn); - } - &mut Inst::Fence {} => {} - &mut Inst::FpuMove64 { - ref mut rd, - ref mut rn, - } => { - mapper.map_def(rd); - mapper.map_use(rn); - } - &mut Inst::FpuMove128 { - ref mut rd, - ref mut rn, - } => { - mapper.map_def(rd); - mapper.map_use(rn); - } - &mut Inst::FpuMoveFromVec { - ref mut rd, - ref mut rn, - .. - } => { - mapper.map_def(rd); - mapper.map_use(rn); - } - &mut Inst::FpuExtend { - ref mut rd, - ref mut rn, - .. - } => { - mapper.map_def(rd); - mapper.map_use(rn); - } - &mut Inst::FpuRR { - ref mut rd, - ref mut rn, - .. - } => { - mapper.map_def(rd); - mapper.map_use(rn); - } - &mut Inst::FpuRRR { - ref mut rd, - ref mut rn, - ref mut rm, - .. - } => { - mapper.map_def(rd); - mapper.map_use(rn); - mapper.map_use(rm); - } - &mut Inst::FpuRRI { - fpu_op, - ref mut rd, - ref mut rn, - .. - } => { - match fpu_op { - FPUOpRI::UShr32(..) | FPUOpRI::UShr64(..) => mapper.map_def(rd), - FPUOpRI::Sli32(..) | FPUOpRI::Sli64(..) => mapper.map_mod(rd), - } - mapper.map_use(rn); - } - &mut Inst::FpuRRRR { - ref mut rd, - ref mut rn, - ref mut rm, - ref mut ra, - .. - } => { - mapper.map_def(rd); - mapper.map_use(rn); - mapper.map_use(rm); - mapper.map_use(ra); - } - &mut Inst::VecMisc { - ref mut rd, - ref mut rn, - .. - } => { - mapper.map_def(rd); - mapper.map_use(rn); - } - &mut Inst::VecLanes { - ref mut rd, - ref mut rn, - .. - } => { - mapper.map_def(rd); - mapper.map_use(rn); - } - &mut Inst::VecShiftImm { - ref mut rd, - ref mut rn, - .. - } => { - mapper.map_def(rd); - mapper.map_use(rn); - } - &mut Inst::VecExtract { - ref mut rd, - ref mut rn, - ref mut rm, - .. - } => { - mapper.map_def(rd); - mapper.map_use(rn); - mapper.map_use(rm); - } - &mut Inst::VecTbl { - ref mut rd, - ref mut rn, - ref mut rm, - is_extension, - } => { - mapper.map_use(rn); - mapper.map_use(rm); - - if is_extension { - mapper.map_mod(rd); - } else { - mapper.map_def(rd); - } - } - &mut Inst::VecTbl2 { - ref mut rd, - ref mut rn, - ref mut rn2, - ref mut rm, - is_extension, - } => { - mapper.map_use(rn); - mapper.map_use(rn2); - mapper.map_use(rm); - - if is_extension { - mapper.map_mod(rd); - } else { - mapper.map_def(rd); - } - } - &mut Inst::VecLoadReplicate { - ref mut rd, - ref mut rn, - .. - } => { - mapper.map_def(rd); - mapper.map_use(rn); - } - &mut Inst::VecCSel { - ref mut rd, - ref mut rn, - ref mut rm, - .. - } => { - mapper.map_def(rd); - mapper.map_use(rn); - mapper.map_use(rm); - } - &mut Inst::FpuCmp32 { - ref mut rn, - ref mut rm, - } => { - mapper.map_use(rn); - mapper.map_use(rm); - } - &mut Inst::FpuCmp64 { - ref mut rn, - ref mut rm, - } => { - mapper.map_use(rn); - mapper.map_use(rm); - } - &mut Inst::FpuLoad32 { - ref mut rd, - ref mut mem, - .. - } => { - mapper.map_def(rd); - map_mem(mapper, mem); - } - &mut Inst::FpuLoad64 { - ref mut rd, - ref mut mem, - .. - } => { - mapper.map_def(rd); - map_mem(mapper, mem); - } - &mut Inst::FpuLoad128 { - ref mut rd, - ref mut mem, - .. - } => { - mapper.map_def(rd); - map_mem(mapper, mem); - } - &mut Inst::FpuStore32 { - ref mut rd, - ref mut mem, - .. - } => { - mapper.map_use(rd); - map_mem(mapper, mem); - } - &mut Inst::FpuStore64 { - ref mut rd, - ref mut mem, - .. - } => { - mapper.map_use(rd); - map_mem(mapper, mem); - } - &mut Inst::FpuStore128 { - ref mut rd, - ref mut mem, - .. - } => { - mapper.map_use(rd); - map_mem(mapper, mem); - } - &mut Inst::FpuLoadP64 { - ref mut rt, - ref mut rt2, - ref mut mem, - .. - } => { - mapper.map_def(rt); - mapper.map_def(rt2); - map_pairmem(mapper, mem); - } - &mut Inst::FpuStoreP64 { - ref mut rt, - ref mut rt2, - ref mut mem, - .. - } => { - mapper.map_use(rt); - mapper.map_use(rt2); - map_pairmem(mapper, mem); - } - &mut Inst::FpuLoadP128 { - ref mut rt, - ref mut rt2, - ref mut mem, - .. - } => { - mapper.map_def(rt); - mapper.map_def(rt2); - map_pairmem(mapper, mem); - } - &mut Inst::FpuStoreP128 { - ref mut rt, - ref mut rt2, - ref mut mem, - .. - } => { - mapper.map_use(rt); - mapper.map_use(rt2); - map_pairmem(mapper, mem); - } - &mut Inst::LoadFpuConst64 { ref mut rd, .. } => { - mapper.map_def(rd); - } - &mut Inst::LoadFpuConst128 { ref mut rd, .. } => { - mapper.map_def(rd); - } - &mut Inst::FpuToInt { - ref mut rd, - ref mut rn, - .. - } => { - mapper.map_def(rd); - mapper.map_use(rn); - } - &mut Inst::IntToFpu { - ref mut rd, - ref mut rn, - .. - } => { - mapper.map_def(rd); - mapper.map_use(rn); - } - &mut Inst::FpuCSel32 { - ref mut rd, - ref mut rn, - ref mut rm, - .. - } => { - mapper.map_def(rd); - mapper.map_use(rn); - mapper.map_use(rm); - } - &mut Inst::FpuCSel64 { - ref mut rd, - ref mut rn, - ref mut rm, - .. - } => { - mapper.map_def(rd); - mapper.map_use(rn); - mapper.map_use(rm); - } - &mut Inst::FpuRound { - ref mut rd, - ref mut rn, - .. - } => { - mapper.map_def(rd); - mapper.map_use(rn); - } - &mut Inst::MovToFpu { - ref mut rd, - ref mut rn, - .. - } => { - mapper.map_def(rd); - mapper.map_use(rn); - } - &mut Inst::FpuMoveFPImm { ref mut rd, .. } => { - mapper.map_def(rd); - } - &mut Inst::MovToVec { - ref mut rd, - ref mut rn, - .. - } => { - mapper.map_mod(rd); - mapper.map_use(rn); - } - &mut Inst::MovFromVec { - ref mut rd, - ref mut rn, - .. - } - | &mut Inst::MovFromVecSigned { - ref mut rd, - ref mut rn, - .. - } => { - mapper.map_def(rd); - mapper.map_use(rn); - } - &mut Inst::VecDup { - ref mut rd, - ref mut rn, - .. - } => { - mapper.map_def(rd); - mapper.map_use(rn); - } - &mut Inst::VecDupFromFpu { - ref mut rd, - ref mut rn, - .. - } => { - mapper.map_def(rd); - mapper.map_use(rn); - } - &mut Inst::VecDupFPImm { ref mut rd, .. } => { - mapper.map_def(rd); - } - &mut Inst::VecDupImm { ref mut rd, .. } => { - mapper.map_def(rd); - } - &mut Inst::VecExtend { - ref mut rd, - ref mut rn, - .. - } => { - mapper.map_def(rd); - mapper.map_use(rn); - } - &mut Inst::VecMovElement { - ref mut rd, - ref mut rn, - .. - } => { - mapper.map_mod(rd); - mapper.map_use(rn); - } - &mut Inst::VecRRLong { - ref mut rd, - ref mut rn, - .. - } => { - mapper.map_def(rd); - mapper.map_use(rn); - } - &mut Inst::VecRRNarrow { - ref mut rd, - ref mut rn, - high_half, - .. - } => { - mapper.map_use(rn); - - if high_half { - mapper.map_mod(rd); - } else { - mapper.map_def(rd); - } - } - &mut Inst::VecRRPair { - ref mut rd, - ref mut rn, - .. - } => { - mapper.map_def(rd); - mapper.map_use(rn); - } - &mut Inst::VecRRRLong { - alu_op, - ref mut rd, - ref mut rn, - ref mut rm, - .. - } => { - match alu_op { - VecRRRLongOp::Umlal8 | VecRRRLongOp::Umlal16 | VecRRRLongOp::Umlal32 => { - mapper.map_mod(rd) - } - _ => mapper.map_def(rd), - }; - mapper.map_use(rn); - mapper.map_use(rm); - } - &mut Inst::VecRRPairLong { - ref mut rd, - ref mut rn, - .. - } => { - mapper.map_def(rd); - mapper.map_use(rn); - } - &mut Inst::VecRRR { - alu_op, - ref mut rd, - ref mut rn, - ref mut rm, - .. - } => { - if alu_op == VecALUOp::Bsl { - mapper.map_mod(rd); - } else { - mapper.map_def(rd); - } - mapper.map_use(rn); - mapper.map_use(rm); - } - &mut Inst::MovToNZCV { ref mut rn } => { - mapper.map_use(rn); - } - &mut Inst::MovFromNZCV { ref mut rd } => { - mapper.map_def(rd); - } - &mut Inst::Extend { - ref mut rd, - ref mut rn, - .. - } => { - mapper.map_def(rd); - mapper.map_use(rn); - } - &mut Inst::Jump { .. } => {} - &mut Inst::Call { ref mut info } => { - for r in info.uses.iter_mut() { - mapper.map_use(r); - } - for r in info.defs.iter_mut() { - mapper.map_def(r); - } - } - &mut Inst::Ret | &mut Inst::EpiloguePlaceholder => {} - &mut Inst::CallInd { ref mut info, .. } => { - for r in info.uses.iter_mut() { - mapper.map_use(r); - } - for r in info.defs.iter_mut() { - mapper.map_def(r); - } - mapper.map_use(&mut info.rn); - } - &mut Inst::CondBr { ref mut kind, .. } => { - map_br(mapper, kind); - } - &mut Inst::IndirectBr { ref mut rn, .. } => { - mapper.map_use(rn); - } - &mut Inst::Nop0 | &mut Inst::Nop4 | &mut Inst::Brk | &mut Inst::Udf { .. } => {} - &mut Inst::TrapIf { ref mut kind, .. } => { - map_br(mapper, kind); - } - &mut Inst::Adr { ref mut rd, .. } => { - mapper.map_def(rd); - } - &mut Inst::Word4 { .. } | &mut Inst::Word8 { .. } => {} - &mut Inst::JTSequence { - ref mut ridx, - ref mut rtmp1, - ref mut rtmp2, - .. - } => { - mapper.map_use(ridx); - mapper.map_def(rtmp1); - mapper.map_def(rtmp2); - } - &mut Inst::LoadExtName { ref mut rd, .. } => { - mapper.map_def(rd); - } - &mut Inst::LoadAddr { - ref mut rd, - ref mut mem, - } => { - mapper.map_def(rd); - map_mem(mapper, mem); - } - &mut Inst::VirtualSPOffsetAdj { .. } => {} - &mut Inst::EmitIsland { .. } => {} - &mut Inst::ElfTlsGetAddr { .. } => {} - &mut Inst::ValueLabelMarker { ref mut reg, .. } => { - mapper.map_use(reg); - } - &mut Inst::Unwind { .. } => {} - } } //============================================================================= @@ -1869,12 +1037,8 @@ pub fn aarch64_map_regs(inst: &mut Inst, mapper: &RM) { impl MachInst for Inst { type LabelUse = LabelUse; - fn get_regs(&self, collector: &mut RegUsageCollector) { - aarch64_get_regs(self, collector) - } - - fn map_regs(&mut self, mapper: &RM) { - aarch64_map_regs(self, mapper); + fn get_operands VReg>(&self, collector: &mut OperandCollector<'_, F>) { + aarch64_get_operands(self, collector); } fn is_move(&self) -> Option<(Writable, Reg)> { @@ -1914,7 +1078,7 @@ impl MachInst for Inst { fn is_term<'a>(&'a self) -> MachTerminator<'a> { match self { - &Inst::Ret | &Inst::EpiloguePlaceholder => MachTerminator::Ret, + &Inst::Ret { .. } | &Inst::EpiloguePlaceholder => MachTerminator::Ret, &Inst::Jump { dest } => MachTerminator::Uncond(dest.as_label().unwrap()), &Inst::CondBr { taken, not_taken, .. @@ -1931,27 +1095,35 @@ impl MachInst for Inst { let bits = ty.bits(); assert!(bits <= 128); - assert!(to_reg.to_reg().get_class() == from_reg.get_class()); - - if from_reg.get_class() == RegClass::I64 { - Inst::Mov64 { + assert!(to_reg.to_reg().class() == from_reg.class()); + match from_reg.class() { + RegClass::Int => Inst::Mov64 { rd: to_reg, rm: from_reg, - } - } else if from_reg.get_class() == RegClass::V128 { - if bits > 64 { - Inst::FpuMove128 { - rd: to_reg, - rn: from_reg, - } - } else { - Inst::FpuMove64 { - rd: to_reg, - rn: from_reg, + }, + RegClass::Float => { + if bits > 64 { + Inst::FpuMove128 { + rd: to_reg, + rn: from_reg, + } + } else { + Inst::FpuMove64 { + rd: to_reg, + rn: from_reg, + } } } - } else { - panic!("Unexpected register class: {:?}", from_reg.get_class()); + } + } + + fn is_safepoint(&self) -> bool { + match self { + &Inst::Call { .. } + | &Inst::CallInd { .. } + | &Inst::TrapIf { .. } + | &Inst::Udf { .. } => true, + _ => false, } } @@ -1973,6 +1145,10 @@ impl MachInst for Inst { } } + fn gen_dummy_use(reg: Reg) -> Inst { + Inst::DummyUse { reg } + } + fn gen_nop(preferred_size: usize) -> Inst { if preferred_size == 0 { return Inst::Nop0; @@ -1982,32 +1158,28 @@ impl MachInst for Inst { Inst::Nop4 } - fn maybe_direct_reload(&self, _reg: VirtualReg, _slot: SpillSlot) -> Option { - None - } - fn rc_for_type(ty: Type) -> CodegenResult<(&'static [RegClass], &'static [Type])> { match ty { - I8 => Ok((&[RegClass::I64], &[I8])), - I16 => Ok((&[RegClass::I64], &[I16])), - I32 => Ok((&[RegClass::I64], &[I32])), - I64 => Ok((&[RegClass::I64], &[I64])), - B1 => Ok((&[RegClass::I64], &[B1])), - B8 => Ok((&[RegClass::I64], &[B8])), - B16 => Ok((&[RegClass::I64], &[B16])), - B32 => Ok((&[RegClass::I64], &[B32])), - B64 => Ok((&[RegClass::I64], &[B64])), + I8 => Ok((&[RegClass::Int], &[I8])), + I16 => Ok((&[RegClass::Int], &[I16])), + I32 => Ok((&[RegClass::Int], &[I32])), + I64 => Ok((&[RegClass::Int], &[I64])), + B1 => Ok((&[RegClass::Int], &[B1])), + B8 => Ok((&[RegClass::Int], &[B8])), + B16 => Ok((&[RegClass::Int], &[B16])), + B32 => Ok((&[RegClass::Int], &[B32])), + B64 => Ok((&[RegClass::Int], &[B64])), R32 => panic!("32-bit reftype pointer should never be seen on AArch64"), - R64 => Ok((&[RegClass::I64], &[R64])), - F32 => Ok((&[RegClass::V128], &[F32])), - F64 => Ok((&[RegClass::V128], &[F64])), - I128 => Ok((&[RegClass::I64, RegClass::I64], &[I64, I64])), - B128 => Ok((&[RegClass::I64, RegClass::I64], &[B64, B64])), + R64 => Ok((&[RegClass::Int], &[R64])), + F32 => Ok((&[RegClass::Float], &[F32])), + F64 => Ok((&[RegClass::Float], &[F64])), + I128 => Ok((&[RegClass::Int, RegClass::Int], &[I64, I64])), + B128 => Ok((&[RegClass::Int, RegClass::Int], &[B64, B64])), _ if ty.is_vector() => { assert!(ty.bits() <= 128); - Ok((&[RegClass::V128], &[I8X16])) + Ok((&[RegClass::Float], &[I8X16])) } - IFLAGS | FFLAGS => Ok((&[RegClass::I64], &[I64])), + IFLAGS | FFLAGS => Ok((&[RegClass::Int], &[I64])), _ => Err(CodegenError::Unsupported(format!( "Unexpected SSA-value type: {}", ty @@ -2015,6 +1187,13 @@ impl MachInst for Inst { } } + fn canonical_type_for_rc(rc: RegClass) -> Type { + match rc { + RegClass::Float => types::I8X16, + RegClass::Int => types::I64, + } + } + fn gen_jump(target: MachLabel) -> Inst { Inst::Jump { dest: BranchTarget::Label(target), @@ -2033,33 +1212,20 @@ impl MachInst for Inst { } fn ref_type_regclass(_: &settings::Flags) -> RegClass { - RegClass::I64 - } - - fn gen_value_label_marker(label: ValueLabel, reg: Reg) -> Self { - Inst::ValueLabelMarker { label, reg } - } - - fn defines_value_label(&self) -> Option<(ValueLabel, Reg)> { - match self { - Inst::ValueLabelMarker { label, reg } => Some((*label, *reg)), - _ => None, - } + RegClass::Int } } //============================================================================= // Pretty-printing of instructions. -fn mem_finalize_for_show( - mem: &AMode, - mb_rru: Option<&RealRegUniverse>, - state: &EmitState, -) -> (String, AMode) { +fn mem_finalize_for_show(mem: &AMode, state: &EmitState) -> (String, AMode) { let (mem_insts, mem) = mem_finalize(0, mem, state); let mut mem_str = mem_insts .into_iter() - .map(|inst| inst.show_rru(mb_rru)) + .map(|inst| { + inst.print_with_state(&mut EmitState::default(), &mut AllocationConsumer::new(&[])) + }) .collect::>() .join(" ; "); if !mem_str.is_empty() { @@ -2069,14 +1235,10 @@ fn mem_finalize_for_show( (mem_str, mem) } -impl PrettyPrint for Inst { - fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String { - self.pretty_print(mb_rru, &mut EmitState::default()) - } -} - impl Inst { - fn print_with_state(&self, mb_rru: Option<&RealRegUniverse>, state: &mut EmitState) -> String { + fn print_with_state(&self, state: &mut EmitState, allocs: &mut AllocationConsumer) -> String { + let mut empty_allocs = AllocationConsumer::default(); + fn op_name(alu_op: ALUOp) -> &'static str { match alu_op { ALUOp::Add => "add", @@ -2105,6 +1267,9 @@ impl Inst { } } + // N.B.: order of `allocs` consumption (via register + // pretty-printing or memarg.with_allocs()) needs to match the + // order in `aarch64_get_operands` above. match self { &Inst::Nop0 => "nop-zero-len".to_string(), &Inst::Nop4 => "nop".to_string(), @@ -2116,9 +1281,9 @@ impl Inst { rm, } => { let op = op_name(alu_op); - let rd = show_ireg_sized(rd.to_reg(), mb_rru, size); - let rn = show_ireg_sized(rn, mb_rru, size); - let rm = show_ireg_sized(rm, mb_rru, size); + let rd = pretty_print_ireg(rd.to_reg(), size, allocs); + let rn = pretty_print_ireg(rn, size, allocs); + let rm = pretty_print_ireg(rm, size, allocs); format!("{} {}, {}, {}", op, rd, rn, rm) } &Inst::AluRRRR { @@ -2134,10 +1299,10 @@ impl Inst { ALUOp3::MSub32 => ("msub", OperandSize::Size32), ALUOp3::MSub64 => ("msub", OperandSize::Size64), }; - let rd = show_ireg_sized(rd.to_reg(), mb_rru, size); - let rn = show_ireg_sized(rn, mb_rru, size); - let rm = show_ireg_sized(rm, mb_rru, size); - let ra = show_ireg_sized(ra, mb_rru, size); + let rd = pretty_print_ireg(rd.to_reg(), size, allocs); + let rn = pretty_print_ireg(rn, size, allocs); + let rm = pretty_print_ireg(rm, size, allocs); + let ra = pretty_print_ireg(ra, size, allocs); format!("{} {}, {}, {}, {}", op, rd, rn, rm, ra) } @@ -2149,14 +1314,14 @@ impl Inst { ref imm12, } => { let op = op_name(alu_op); - let rd = show_ireg_sized(rd.to_reg(), mb_rru, size); - let rn = show_ireg_sized(rn, mb_rru, size); + let rd = pretty_print_ireg(rd.to_reg(), size, allocs); + let rn = pretty_print_ireg(rn, size, allocs); if imm12.bits == 0 && alu_op == ALUOp::Add && size.is64() { // special-case MOV (used for moving into SP). format!("mov {}, {}", rd, rn) } else { - let imm12 = imm12.show_rru(mb_rru); + let imm12 = imm12.pretty_print(0, allocs); format!("{} {}, {}, {}", op, rd, rn, imm12) } } @@ -2168,9 +1333,9 @@ impl Inst { ref imml, } => { let op = op_name(alu_op); - let rd = show_ireg_sized(rd.to_reg(), mb_rru, size); - let rn = show_ireg_sized(rn, mb_rru, size); - let imml = imml.show_rru(mb_rru); + let rd = pretty_print_ireg(rd.to_reg(), size, allocs); + let rn = pretty_print_ireg(rn, size, allocs); + let imml = imml.pretty_print(0, allocs); format!("{} {}, {}, {}", op, rd, rn, imml) } &Inst::AluRRImmShift { @@ -2181,9 +1346,9 @@ impl Inst { ref immshift, } => { let op = op_name(alu_op); - let rd = show_ireg_sized(rd.to_reg(), mb_rru, size); - let rn = show_ireg_sized(rn, mb_rru, size); - let immshift = immshift.show_rru(mb_rru); + let rd = pretty_print_ireg(rd.to_reg(), size, allocs); + let rn = pretty_print_ireg(rn, size, allocs); + let immshift = immshift.pretty_print(0, allocs); format!("{} {}, {}, {}", op, rd, rn, immshift) } &Inst::AluRRRShift { @@ -2195,10 +1360,10 @@ impl Inst { ref shiftop, } => { let op = op_name(alu_op); - let rd = show_ireg_sized(rd.to_reg(), mb_rru, size); - let rn = show_ireg_sized(rn, mb_rru, size); - let rm = show_ireg_sized(rm, mb_rru, size); - let shiftop = shiftop.show_rru(mb_rru); + let rd = pretty_print_ireg(rd.to_reg(), size, allocs); + let rn = pretty_print_ireg(rn, size, allocs); + let rm = pretty_print_ireg(rm, size, allocs); + let shiftop = shiftop.pretty_print(0, allocs); format!("{} {}, {}, {}, {}", op, rd, rn, rm, shiftop) } &Inst::AluRRRExtend { @@ -2210,16 +1375,16 @@ impl Inst { ref extendop, } => { let op = op_name(alu_op); - let rd = show_ireg_sized(rd.to_reg(), mb_rru, size); - let rn = show_ireg_sized(rn, mb_rru, size); - let rm = show_ireg_sized(rm, mb_rru, size); - let extendop = extendop.show_rru(mb_rru); + let rd = pretty_print_ireg(rd.to_reg(), size, allocs); + let rn = pretty_print_ireg(rn, size, allocs); + let rm = pretty_print_ireg(rm, size, allocs); + let extendop = extendop.pretty_print(0, allocs); format!("{} {}, {}, {}, {}", op, rd, rn, rm, extendop) } &Inst::BitRR { op, size, rd, rn } => { let op = op.op_str(); - let rd = show_ireg_sized(rd.to_reg(), mb_rru, size); - let rn = show_ireg_sized(rn, mb_rru, size); + let rd = pretty_print_ireg(rd.to_reg(), size, allocs); + let rn = pretty_print_ireg(rn, size, allocs); format!("{} {}, {}", op, rd, rn) } &Inst::ULoad8 { rd, ref mem, .. } @@ -2229,8 +1394,6 @@ impl Inst { | &Inst::ULoad32 { rd, ref mem, .. } | &Inst::SLoad32 { rd, ref mem, .. } | &Inst::ULoad64 { rd, ref mem, .. } => { - let (mem_str, mem) = mem_finalize_for_show(mem, mb_rru, state); - let is_unscaled = match &mem { &AMode::Unscaled(..) => true, _ => false, @@ -2252,16 +1415,18 @@ impl Inst { (&Inst::ULoad64 { .. }, true) => ("ldur", OperandSize::Size64), _ => unreachable!(), }; - let rd = show_ireg_sized(rd.to_reg(), mb_rru, size); - let mem = mem.show_rru(mb_rru); + + let rd = pretty_print_ireg(rd.to_reg(), size, allocs); + let mem = mem.with_allocs(allocs); + let (mem_str, mem) = mem_finalize_for_show(&mem, state); + let mem = mem.pretty_print_default(); + format!("{}{} {}, {}", mem_str, op, rd, mem) } &Inst::Store8 { rd, ref mem, .. } | &Inst::Store16 { rd, ref mem, .. } | &Inst::Store32 { rd, ref mem, .. } | &Inst::Store64 { rd, ref mem, .. } => { - let (mem_str, mem) = mem_finalize_for_show(mem, mb_rru, state); - let is_unscaled = match &mem { &AMode::Unscaled(..) => true, _ => false, @@ -2277,66 +1442,72 @@ impl Inst { (&Inst::Store64 { .. }, true) => ("stur", OperandSize::Size64), _ => unreachable!(), }; - let rd = show_ireg_sized(rd, mb_rru, size); - let mem = mem.show_rru(mb_rru); + + let rd = pretty_print_ireg(rd, size, allocs); + let mem = mem.with_allocs(allocs); + let (mem_str, mem) = mem_finalize_for_show(&mem, state); + let mem = mem.pretty_print_default(); + format!("{}{} {}, {}", mem_str, op, rd, mem) } &Inst::StoreP64 { rt, rt2, ref mem, .. } => { - let rt = rt.show_rru(mb_rru); - let rt2 = rt2.show_rru(mb_rru); - let mem = mem.show_rru(mb_rru); + let rt = pretty_print_ireg(rt, OperandSize::Size64, allocs); + let rt2 = pretty_print_ireg(rt2, OperandSize::Size64, allocs); + let mem = mem.with_allocs(allocs); + let mem = mem.pretty_print_default(); format!("stp {}, {}, {}", rt, rt2, mem) } &Inst::LoadP64 { rt, rt2, ref mem, .. } => { - let rt = rt.to_reg().show_rru(mb_rru); - let rt2 = rt2.to_reg().show_rru(mb_rru); - let mem = mem.show_rru(mb_rru); + let rt = pretty_print_ireg(rt.to_reg(), OperandSize::Size64, allocs); + let rt2 = pretty_print_ireg(rt2.to_reg(), OperandSize::Size64, allocs); + let mem = mem.with_allocs(allocs); + let mem = mem.pretty_print_default(); format!("ldp {}, {}, {}", rt, rt2, mem) } &Inst::Mov64 { rd, rm } => { - let rd = rd.to_reg().show_rru(mb_rru); - let rm = rm.show_rru(mb_rru); + let rd = pretty_print_ireg(rd.to_reg(), OperandSize::Size64, allocs); + let rm = pretty_print_ireg(rm, OperandSize::Size64, allocs); format!("mov {}, {}", rd, rm) } &Inst::Mov32 { rd, rm } => { - let rd = show_ireg_sized(rd.to_reg(), mb_rru, OperandSize::Size32); - let rm = show_ireg_sized(rm, mb_rru, OperandSize::Size32); + let rd = pretty_print_ireg(rd.to_reg(), OperandSize::Size32, allocs); + let rm = pretty_print_ireg(rm, OperandSize::Size32, allocs); format!("mov {}, {}", rd, rm) } &Inst::MovZ { rd, ref imm, size } => { - let rd = show_ireg_sized(rd.to_reg(), mb_rru, size); - let imm = imm.show_rru(mb_rru); + let rd = pretty_print_ireg(rd.to_reg(), size, allocs); + let imm = imm.pretty_print(0, allocs); format!("movz {}, {}", rd, imm) } &Inst::MovN { rd, ref imm, size } => { - let rd = show_ireg_sized(rd.to_reg(), mb_rru, size); - let imm = imm.show_rru(mb_rru); + let rd = pretty_print_ireg(rd.to_reg(), size, allocs); + let imm = imm.pretty_print(0, allocs); format!("movn {}, {}", rd, imm) } &Inst::MovK { rd, ref imm, size } => { - let rd = show_ireg_sized(rd.to_reg(), mb_rru, size); - let imm = imm.show_rru(mb_rru); + let rd = pretty_print_ireg(rd.to_reg(), size, allocs); + let imm = imm.pretty_print(0, allocs); format!("movk {}, {}", rd, imm) } &Inst::CSel { rd, rn, rm, cond } => { - let rd = rd.to_reg().show_rru(mb_rru); - let rn = rn.show_rru(mb_rru); - let rm = rm.show_rru(mb_rru); - let cond = cond.show_rru(mb_rru); + let rd = pretty_print_ireg(rd.to_reg(), OperandSize::Size64, allocs); + let rn = pretty_print_ireg(rn, OperandSize::Size64, allocs); + let rm = pretty_print_ireg(rm, OperandSize::Size64, allocs); + let cond = cond.pretty_print(0, allocs); format!("csel {}, {}, {}, {}", rd, rn, rm, cond) } &Inst::CSet { rd, cond } => { - let rd = rd.to_reg().show_rru(mb_rru); - let cond = cond.show_rru(mb_rru); + let rd = pretty_print_ireg(rd.to_reg(), OperandSize::Size64, allocs); + let cond = cond.pretty_print(0, allocs); format!("cset {}, {}", rd, cond) } &Inst::CSetm { rd, cond } => { - let rd = rd.to_reg().show_rru(mb_rru); - let cond = cond.show_rru(mb_rru); + let rd = pretty_print_ireg(rd.to_reg(), OperandSize::Size64, allocs); + let cond = cond.pretty_print(0, allocs); format!("csetm {}, {}", rd, cond) } &Inst::CCmpImm { @@ -2346,10 +1517,10 @@ impl Inst { nzcv, cond, } => { - let rn = show_ireg_sized(rn, mb_rru, size); - let imm = imm.show_rru(mb_rru); - let nzcv = nzcv.show_rru(mb_rru); - let cond = cond.show_rru(mb_rru); + let rn = pretty_print_ireg(rn, size, allocs); + let imm = imm.pretty_print(0, allocs); + let nzcv = nzcv.pretty_print(0, allocs); + let cond = cond.pretty_print(0, allocs); format!("ccmp {}, {}, {}, {}", rn, imm, nzcv, cond) } &Inst::AtomicRMW { rs, rt, rn, ty, op } => { @@ -2365,9 +1536,9 @@ impl Inst { }; let size = OperandSize::from_ty(ty); - let rs = show_ireg_sized(rs, mb_rru, size); - let rt = show_ireg_sized(rt.to_reg(), mb_rru, size); - let rn = rn.show_rru(mb_rru); + let rs = pretty_print_ireg(rs, size, allocs); + let rt = pretty_print_ireg(rt.to_reg(), size, allocs); + let rn = pretty_print_ireg(rn, OperandSize::Size64, allocs); let ty_suffix = match ty { I8 => "b", @@ -2383,13 +1554,14 @@ impl Inst { _ => "", }; let size = OperandSize::from_ty(ty); - let r_status = show_ireg_sized(xreg(24), mb_rru, OperandSize::Size32); - let r_arg2 = show_ireg_sized(xreg(26), mb_rru, size); - let r_tmp = show_ireg_sized(xreg(27), mb_rru, size); - let mut r_dst = show_ireg_sized(xreg(28), mb_rru, size); + let r_addr = pretty_print_ireg(xreg(25), OperandSize::Size64, allocs); + let r_arg2 = pretty_print_ireg(xreg(26), size, allocs); + let r_status = pretty_print_ireg(xreg(24), OperandSize::Size32, allocs); + let r_tmp = pretty_print_ireg(xreg(27), size, allocs); + let mut r_dst = pretty_print_ireg(xreg(28), size, allocs); let mut loop_str: String = "1: ".to_string(); - loop_str.push_str(&format!("ldaxr{} {}, [x25]; ", ty_suffix, r_tmp)); + loop_str.push_str(&format!("ldaxr{} {}, [{}]; ", ty_suffix, r_tmp, r_addr)); let op_str = match op { inst_common::AtomicRmwOp::Add => "add", @@ -2426,8 +1598,8 @@ impl Inst { loop_str.push_str(&format!("{} {}, {}, {}; ", op_str, r_dst, r_tmp, r_arg2)); } loop_str.push_str(&format!( - "stlxr{} {}, {}, [x25]; ", - ty_suffix, r_status, r_dst + "stlxr{} {}, {}, [{}]; ", + ty_suffix, r_status, r_dst, r_addr )); loop_str.push_str(&format!("cbnz {}, 1b", r_status)); loop_str @@ -2440,9 +1612,9 @@ impl Inst { _ => panic!("Unsupported type: {}", ty), }; let size = OperandSize::from_ty(ty); - let rs = show_ireg_sized(rs.to_reg(), mb_rru, size); - let rt = show_ireg_sized(rt, mb_rru, size); - let rn = rn.show_rru(mb_rru); + let rs = pretty_print_ireg(rs.to_reg(), size, allocs); + let rt = pretty_print_ireg(rt, size, allocs); + let rn = pretty_print_ireg(rn, OperandSize::Size64, allocs); format!("{} {}, {}, [{}]", op, rs, rt, rn) } @@ -2462,8 +1634,8 @@ impl Inst { _ => panic!("Unsupported type: {}", access_ty), }; let size = OperandSize::from_ty(ty); - let rt = show_ireg_sized(rt.to_reg(), mb_rru, size); - let rn = rn.show_rru(mb_rru); + let rn = pretty_print_ireg(rn, OperandSize::Size64, allocs); + let rt = pretty_print_ireg(rt.to_reg(), size, allocs); format!("{} {}, [{}]", op, rt, rn) } &Inst::StoreRelease { @@ -2477,32 +1649,31 @@ impl Inst { _ => panic!("Unsupported type: {}", access_ty), }; let size = OperandSize::from_ty(ty); - let rt = show_ireg_sized(rt, mb_rru, size); - let rn = rn.show_rru(mb_rru); + let rn = pretty_print_ireg(rn, OperandSize::Size64, allocs); + let rt = pretty_print_ireg(rt, size, allocs); format!("{} {}, [{}]", op, rt, rn) } &Inst::Fence {} => { format!("dmb ish") } &Inst::FpuMove64 { rd, rn } => { - let rd = show_vreg_scalar(rd.to_reg(), mb_rru, ScalarSize::Size64); - let rn = show_vreg_scalar(rn, mb_rru, ScalarSize::Size64); + let rd = pretty_print_vreg_scalar(rd.to_reg(), ScalarSize::Size64, allocs); + let rn = pretty_print_vreg_scalar(rn, ScalarSize::Size64, allocs); format!("fmov {}, {}", rd, rn) } &Inst::FpuMove128 { rd, rn } => { - let rd = rd.to_reg().show_rru(mb_rru); - let rn = rn.show_rru(mb_rru); + let rd = pretty_print_reg(rd.to_reg(), allocs); + let rn = pretty_print_reg(rn, allocs); format!("mov {}.16b, {}.16b", rd, rn) } &Inst::FpuMoveFromVec { rd, rn, idx, size } => { - let rd = show_vreg_scalar(rd.to_reg(), mb_rru, size.lane_size()); - let rn = show_vreg_element(rn, mb_rru, idx, size); + let rd = pretty_print_vreg_scalar(rd.to_reg(), size.lane_size(), allocs); + let rn = pretty_print_vreg_element(rn, idx as usize, size, allocs); format!("mov {}, {}", rd, rn) } &Inst::FpuExtend { rd, rn, size } => { - let rd = show_vreg_scalar(rd.to_reg(), mb_rru, size); - let rn = show_vreg_scalar(rn, mb_rru, size); - + let rd = pretty_print_vreg_scalar(rd.to_reg(), size, allocs); + let rn = pretty_print_vreg_scalar(rn, size, allocs); format!("fmov {}, {}", rd, rn) } &Inst::FpuRR { fpu_op, rd, rn } => { @@ -2516,8 +1687,8 @@ impl Inst { FPUOp1::Cvt32To64 => ("fcvt", ScalarSize::Size32, ScalarSize::Size64), FPUOp1::Cvt64To32 => ("fcvt", ScalarSize::Size64, ScalarSize::Size32), }; - let rd = show_vreg_scalar(rd.to_reg(), mb_rru, sizedest); - let rn = show_vreg_scalar(rn, mb_rru, sizesrc); + let rd = pretty_print_vreg_scalar(rd.to_reg(), sizedest, allocs); + let rn = pretty_print_vreg_scalar(rn, sizesrc, allocs); format!("{} {}, {}", op, rd, rn) } &Inst::FpuRRR { fpu_op, rd, rn, rm } => { @@ -2539,26 +1710,30 @@ impl Inst { FPUOp2::Sqsub64 => ("sqsub", ScalarSize::Size64), FPUOp2::Uqsub64 => ("uqsub", ScalarSize::Size64), }; - let rd = show_vreg_scalar(rd.to_reg(), mb_rru, size); - let rn = show_vreg_scalar(rn, mb_rru, size); - let rm = show_vreg_scalar(rm, mb_rru, size); + let rd = pretty_print_vreg_scalar(rd.to_reg(), size, allocs); + let rn = pretty_print_vreg_scalar(rn, size, allocs); + let rm = pretty_print_vreg_scalar(rm, size, allocs); format!("{} {}, {}, {}", op, rd, rn, rm) } &Inst::FpuRRI { fpu_op, rd, rn } => { let (op, imm, vector) = match fpu_op { - FPUOpRI::UShr32(imm) => ("ushr", imm.show_rru(mb_rru), true), - FPUOpRI::UShr64(imm) => ("ushr", imm.show_rru(mb_rru), false), - FPUOpRI::Sli32(imm) => ("sli", imm.show_rru(mb_rru), true), - FPUOpRI::Sli64(imm) => ("sli", imm.show_rru(mb_rru), false), + FPUOpRI::UShr32(imm) => ("ushr", imm.pretty_print(0, allocs), true), + FPUOpRI::UShr64(imm) => ("ushr", imm.pretty_print(0, allocs), false), + FPUOpRI::Sli32(imm) => ("sli", imm.pretty_print(0, allocs), true), + FPUOpRI::Sli64(imm) => ("sli", imm.pretty_print(0, allocs), false), }; - let show_vreg_fn: fn(Reg, Option<&RealRegUniverse>) -> String = if vector { - |reg, mb_rru| show_vreg_vector(reg, mb_rru, VectorSize::Size32x2) + let (rd, rn) = if vector { + ( + pretty_print_vreg_vector(rd.to_reg(), VectorSize::Size32x2, allocs), + pretty_print_vreg_vector(rn, VectorSize::Size32x2, allocs), + ) } else { - |reg, mb_rru| show_vreg_scalar(reg, mb_rru, ScalarSize::Size64) + ( + pretty_print_vreg_scalar(rd.to_reg(), ScalarSize::Size64, allocs), + pretty_print_vreg_scalar(rn, ScalarSize::Size64, allocs), + ) }; - let rd = show_vreg_fn(rd.to_reg(), mb_rru); - let rn = show_vreg_fn(rn, mb_rru); format!("{} {}, {}, {}", op, rd, rn, imm) } &Inst::FpuRRRR { @@ -2572,98 +1747,108 @@ impl Inst { FPUOp3::MAdd32 => ("fmadd", ScalarSize::Size32), FPUOp3::MAdd64 => ("fmadd", ScalarSize::Size64), }; - let rd = show_vreg_scalar(rd.to_reg(), mb_rru, size); - let rn = show_vreg_scalar(rn, mb_rru, size); - let rm = show_vreg_scalar(rm, mb_rru, size); - let ra = show_vreg_scalar(ra, mb_rru, size); + let rd = pretty_print_vreg_scalar(rd.to_reg(), size, allocs); + let rn = pretty_print_vreg_scalar(rn, size, allocs); + let rm = pretty_print_vreg_scalar(rm, size, allocs); + let ra = pretty_print_vreg_scalar(ra, size, allocs); format!("{} {}, {}, {}, {}", op, rd, rn, rm, ra) } &Inst::FpuCmp32 { rn, rm } => { - let rn = show_vreg_scalar(rn, mb_rru, ScalarSize::Size32); - let rm = show_vreg_scalar(rm, mb_rru, ScalarSize::Size32); + let rn = pretty_print_vreg_scalar(rn, ScalarSize::Size32, allocs); + let rm = pretty_print_vreg_scalar(rm, ScalarSize::Size32, allocs); format!("fcmp {}, {}", rn, rm) } &Inst::FpuCmp64 { rn, rm } => { - let rn = show_vreg_scalar(rn, mb_rru, ScalarSize::Size64); - let rm = show_vreg_scalar(rm, mb_rru, ScalarSize::Size64); + let rn = pretty_print_vreg_scalar(rn, ScalarSize::Size64, allocs); + let rm = pretty_print_vreg_scalar(rm, ScalarSize::Size64, allocs); format!("fcmp {}, {}", rn, rm) } &Inst::FpuLoad32 { rd, ref mem, .. } => { - let rd = show_vreg_scalar(rd.to_reg(), mb_rru, ScalarSize::Size32); - let (mem_str, mem) = mem_finalize_for_show(mem, mb_rru, state); - let mem = mem.show_rru(mb_rru); + let rd = pretty_print_vreg_scalar(rd.to_reg(), ScalarSize::Size32, allocs); + let mem = mem.with_allocs(allocs); + let (mem_str, mem) = mem_finalize_for_show(&mem, state); + let mem = mem.pretty_print_default(); format!("{}ldr {}, {}", mem_str, rd, mem) } &Inst::FpuLoad64 { rd, ref mem, .. } => { - let rd = show_vreg_scalar(rd.to_reg(), mb_rru, ScalarSize::Size64); - let (mem_str, mem) = mem_finalize_for_show(mem, mb_rru, state); - let mem = mem.show_rru(mb_rru); + let rd = pretty_print_vreg_scalar(rd.to_reg(), ScalarSize::Size64, allocs); + let mem = mem.with_allocs(allocs); + let (mem_str, mem) = mem_finalize_for_show(&mem, state); + let mem = mem.pretty_print_default(); format!("{}ldr {}, {}", mem_str, rd, mem) } &Inst::FpuLoad128 { rd, ref mem, .. } => { - let rd = rd.to_reg().show_rru(mb_rru); + let rd = pretty_print_reg(rd.to_reg(), allocs); let rd = "q".to_string() + &rd[1..]; - let (mem_str, mem) = mem_finalize_for_show(mem, mb_rru, state); - let mem = mem.show_rru(mb_rru); + let mem = mem.with_allocs(allocs); + let (mem_str, mem) = mem_finalize_for_show(&mem, state); + let mem = mem.pretty_print_default(); format!("{}ldr {}, {}", mem_str, rd, mem) } &Inst::FpuStore32 { rd, ref mem, .. } => { - let rd = show_vreg_scalar(rd, mb_rru, ScalarSize::Size32); - let (mem_str, mem) = mem_finalize_for_show(mem, mb_rru, state); - let mem = mem.show_rru(mb_rru); + let rd = pretty_print_vreg_scalar(rd, ScalarSize::Size32, allocs); + let mem = mem.with_allocs(allocs); + let (mem_str, mem) = mem_finalize_for_show(&mem, state); + let mem = mem.pretty_print_default(); format!("{}str {}, {}", mem_str, rd, mem) } &Inst::FpuStore64 { rd, ref mem, .. } => { - let rd = show_vreg_scalar(rd, mb_rru, ScalarSize::Size64); - let (mem_str, mem) = mem_finalize_for_show(mem, mb_rru, state); - let mem = mem.show_rru(mb_rru); + let rd = pretty_print_vreg_scalar(rd, ScalarSize::Size64, allocs); + let mem = mem.with_allocs(allocs); + let (mem_str, mem) = mem_finalize_for_show(&mem, state); + let mem = mem.pretty_print_default(); format!("{}str {}, {}", mem_str, rd, mem) } &Inst::FpuStore128 { rd, ref mem, .. } => { - let rd = rd.show_rru(mb_rru); + let rd = pretty_print_reg(rd, allocs); let rd = "q".to_string() + &rd[1..]; - let (mem_str, mem) = mem_finalize_for_show(mem, mb_rru, state); - let mem = mem.show_rru(mb_rru); + let mem = mem.with_allocs(allocs); + let (mem_str, mem) = mem_finalize_for_show(&mem, state); + let mem = mem.pretty_print_default(); format!("{}str {}, {}", mem_str, rd, mem) } &Inst::FpuLoadP64 { rt, rt2, ref mem, .. } => { - let rt = show_vreg_scalar(rt.to_reg(), mb_rru, ScalarSize::Size64); - let rt2 = show_vreg_scalar(rt2.to_reg(), mb_rru, ScalarSize::Size64); - let mem = mem.show_rru(mb_rru); + let rt = pretty_print_vreg_scalar(rt.to_reg(), ScalarSize::Size64, allocs); + let rt2 = pretty_print_vreg_scalar(rt2.to_reg(), ScalarSize::Size64, allocs); + let mem = mem.with_allocs(allocs); + let mem = mem.pretty_print_default(); format!("ldp {}, {}, {}", rt, rt2, mem) } &Inst::FpuStoreP64 { rt, rt2, ref mem, .. } => { - let rt = show_vreg_scalar(rt, mb_rru, ScalarSize::Size64); - let rt2 = show_vreg_scalar(rt2, mb_rru, ScalarSize::Size64); - let mem = mem.show_rru(mb_rru); + let rt = pretty_print_vreg_scalar(rt, ScalarSize::Size64, allocs); + let rt2 = pretty_print_vreg_scalar(rt2, ScalarSize::Size64, allocs); + let mem = mem.with_allocs(allocs); + let mem = mem.pretty_print_default(); format!("stp {}, {}, {}", rt, rt2, mem) } &Inst::FpuLoadP128 { rt, rt2, ref mem, .. } => { - let rt = show_vreg_scalar(rt.to_reg(), mb_rru, ScalarSize::Size128); - let rt2 = show_vreg_scalar(rt2.to_reg(), mb_rru, ScalarSize::Size128); - let mem = mem.show_rru(mb_rru); + let rt = pretty_print_vreg_scalar(rt.to_reg(), ScalarSize::Size128, allocs); + let rt2 = pretty_print_vreg_scalar(rt2.to_reg(), ScalarSize::Size128, allocs); + let mem = mem.with_allocs(allocs); + let mem = mem.pretty_print_default(); format!("ldp {}, {}, {}", rt, rt2, mem) } &Inst::FpuStoreP128 { rt, rt2, ref mem, .. } => { - let rt = show_vreg_scalar(rt, mb_rru, ScalarSize::Size128); - let rt2 = show_vreg_scalar(rt2, mb_rru, ScalarSize::Size128); - let mem = mem.show_rru(mb_rru); + let rt = pretty_print_vreg_scalar(rt, ScalarSize::Size128, allocs); + let rt2 = pretty_print_vreg_scalar(rt2, ScalarSize::Size128, allocs); + let mem = mem.with_allocs(allocs); + let mem = mem.pretty_print_default(); format!("stp {}, {}, {}", rt, rt2, mem) } &Inst::LoadFpuConst64 { rd, const_data } => { - let rd = show_vreg_scalar(rd.to_reg(), mb_rru, ScalarSize::Size64); + let rd = pretty_print_vreg_scalar(rd.to_reg(), ScalarSize::Size64, allocs); format!( "ldr {}, pc+8 ; b 12 ; data.f64 {}", rd, @@ -2671,7 +1856,7 @@ impl Inst { ) } &Inst::LoadFpuConst128 { rd, const_data } => { - let rd = show_vreg_scalar(rd.to_reg(), mb_rru, ScalarSize::Size128); + let rd = pretty_print_vreg_scalar(rd.to_reg(), ScalarSize::Size128, allocs); format!("ldr {}, pc+8 ; b 20 ; data.f128 0x{:032x}", rd, const_data) } &Inst::FpuToInt { op, rd, rn } => { @@ -2685,8 +1870,8 @@ impl Inst { FpuToIntOp::F64ToI64 => ("fcvtzs", ScalarSize::Size64, OperandSize::Size64), FpuToIntOp::F64ToU64 => ("fcvtzu", ScalarSize::Size64, OperandSize::Size64), }; - let rd = show_ireg_sized(rd.to_reg(), mb_rru, sizedest); - let rn = show_vreg_scalar(rn, mb_rru, sizesrc); + let rd = pretty_print_ireg(rd.to_reg(), sizedest, allocs); + let rn = pretty_print_vreg_scalar(rn, sizesrc, allocs); format!("{} {}, {}", op, rd, rn) } &Inst::IntToFpu { op, rd, rn } => { @@ -2700,22 +1885,22 @@ impl Inst { IntToFpuOp::I64ToF64 => ("scvtf", OperandSize::Size64, ScalarSize::Size64), IntToFpuOp::U64ToF64 => ("ucvtf", OperandSize::Size64, ScalarSize::Size64), }; - let rd = show_vreg_scalar(rd.to_reg(), mb_rru, sizedest); - let rn = show_ireg_sized(rn, mb_rru, sizesrc); + let rd = pretty_print_vreg_scalar(rd.to_reg(), sizedest, allocs); + let rn = pretty_print_ireg(rn, sizesrc, allocs); format!("{} {}, {}", op, rd, rn) } &Inst::FpuCSel32 { rd, rn, rm, cond } => { - let rd = show_vreg_scalar(rd.to_reg(), mb_rru, ScalarSize::Size32); - let rn = show_vreg_scalar(rn, mb_rru, ScalarSize::Size32); - let rm = show_vreg_scalar(rm, mb_rru, ScalarSize::Size32); - let cond = cond.show_rru(mb_rru); + let rd = pretty_print_vreg_scalar(rd.to_reg(), ScalarSize::Size32, allocs); + let rn = pretty_print_vreg_scalar(rn, ScalarSize::Size32, allocs); + let rm = pretty_print_vreg_scalar(rm, ScalarSize::Size32, allocs); + let cond = cond.pretty_print(0, allocs); format!("fcsel {}, {}, {}, {}", rd, rn, rm, cond) } &Inst::FpuCSel64 { rd, rn, rm, cond } => { - let rd = show_vreg_scalar(rd.to_reg(), mb_rru, ScalarSize::Size64); - let rn = show_vreg_scalar(rn, mb_rru, ScalarSize::Size64); - let rm = show_vreg_scalar(rm, mb_rru, ScalarSize::Size64); - let cond = cond.show_rru(mb_rru); + let rd = pretty_print_vreg_scalar(rd.to_reg(), ScalarSize::Size64, allocs); + let rn = pretty_print_vreg_scalar(rn, ScalarSize::Size64, allocs); + let rm = pretty_print_vreg_scalar(rm, ScalarSize::Size64, allocs); + let cond = cond.pretty_print(0, allocs); format!("fcsel {}, {}, {}, {}", rd, rn, rm, cond) } &Inst::FpuRound { op, rd, rn } => { @@ -2729,25 +1914,25 @@ impl Inst { FpuRoundMode::Nearest32 => ("frintn", ScalarSize::Size32), FpuRoundMode::Nearest64 => ("frintn", ScalarSize::Size64), }; - let rd = show_vreg_scalar(rd.to_reg(), mb_rru, size); - let rn = show_vreg_scalar(rn, mb_rru, size); + let rd = pretty_print_vreg_scalar(rd.to_reg(), size, allocs); + let rn = pretty_print_vreg_scalar(rn, size, allocs); format!("{} {}, {}", inst, rd, rn) } &Inst::MovToFpu { rd, rn, size } => { let operand_size = size.operand_size(); - let rd = show_vreg_scalar(rd.to_reg(), mb_rru, size); - let rn = show_ireg_sized(rn, mb_rru, operand_size); + let rd = pretty_print_vreg_scalar(rd.to_reg(), size, allocs); + let rn = pretty_print_ireg(rn, operand_size, allocs); format!("fmov {}, {}", rd, rn) } &Inst::FpuMoveFPImm { rd, imm, size } => { - let imm = imm.show_rru(mb_rru); - let rd = show_vreg_scalar(rd.to_reg(), mb_rru, size); + let imm = imm.pretty_print(0, allocs); + let rd = pretty_print_vreg_scalar(rd.to_reg(), size, allocs); format!("fmov {}, {}", rd, imm) } &Inst::MovToVec { rd, rn, idx, size } => { - let rd = show_vreg_element(rd.to_reg(), mb_rru, idx, size); - let rn = show_ireg_sized(rn, mb_rru, size.operand_size()); + let rd = pretty_print_vreg_element(rd.to_reg(), idx as usize, size, allocs); + let rn = pretty_print_ireg(rn, size.operand_size(), allocs); format!("mov {}, {}", rd, rn) } &Inst::MovFromVec { rd, rn, idx, size } => { @@ -2758,8 +1943,8 @@ impl Inst { VectorSize::Size64x2 => "mov", _ => unimplemented!(), }; - let rd = show_ireg_sized(rd.to_reg(), mb_rru, size.operand_size()); - let rn = show_vreg_element(rn, mb_rru, idx, size); + let rd = pretty_print_ireg(rd.to_reg(), size.operand_size(), allocs); + let rn = pretty_print_vreg_element(rn, idx as usize, size, allocs); format!("{} {}, {}", op, rd, rn) } &Inst::MovFromVecSigned { @@ -2769,23 +1954,23 @@ impl Inst { size, scalar_size, } => { - let rd = show_ireg_sized(rd.to_reg(), mb_rru, scalar_size); - let rn = show_vreg_element(rn, mb_rru, idx, size); + let rd = pretty_print_ireg(rd.to_reg(), scalar_size, allocs); + let rn = pretty_print_vreg_element(rn, idx as usize, size, allocs); format!("smov {}, {}", rd, rn) } &Inst::VecDup { rd, rn, size } => { - let rd = show_vreg_vector(rd.to_reg(), mb_rru, size); - let rn = show_ireg_sized(rn, mb_rru, size.operand_size()); + let rd = pretty_print_vreg_vector(rd.to_reg(), size, allocs); + let rn = pretty_print_ireg(rn, size.operand_size(), allocs); format!("dup {}, {}", rd, rn) } &Inst::VecDupFromFpu { rd, rn, size } => { - let rd = show_vreg_vector(rd.to_reg(), mb_rru, size); - let rn = show_vreg_element(rn, mb_rru, 0, size); + let rd = pretty_print_vreg_vector(rd.to_reg(), size, allocs); + let rn = pretty_print_vreg_element(rn, 0, size, allocs); format!("dup {}, {}", rd, rn) } &Inst::VecDupFPImm { rd, imm, size } => { - let imm = imm.show_rru(mb_rru); - let rd = show_vreg_vector(rd.to_reg(), mb_rru, size); + let imm = imm.pretty_print(0, allocs); + let rd = pretty_print_vreg_vector(rd.to_reg(), size, allocs); format!("fmov {}, {}", rd, imm) } @@ -2795,9 +1980,9 @@ impl Inst { invert, size, } => { - let imm = imm.show_rru(mb_rru); + let imm = imm.pretty_print(0, allocs); let op = if invert { "mvni" } else { "movi" }; - let rd = show_vreg_vector(rd.to_reg(), mb_rru, size); + let rd = pretty_print_vreg_vector(rd.to_reg(), size, allocs); format!("{} {}, {}", op, rd, imm) } @@ -2845,8 +2030,8 @@ impl Inst { ("uxtl2", VectorSize::Size64x2, VectorSize::Size32x4) } }; - let rd = show_vreg_vector(rd.to_reg(), mb_rru, dest); - let rn = show_vreg_vector(rn, mb_rru, src); + let rd = pretty_print_vreg_vector(rd.to_reg(), dest, allocs); + let rn = pretty_print_vreg_vector(rn, src, allocs); format!("{} {}, {}", op, rd, rn) } &Inst::VecMovElement { @@ -2856,8 +2041,8 @@ impl Inst { src_idx, size, } => { - let rd = show_vreg_element(rd.to_reg(), mb_rru, dest_idx, size); - let rn = show_vreg_element(rn, mb_rru, src_idx, size); + let rd = pretty_print_vreg_element(rd.to_reg(), dest_idx as usize, size, allocs); + let rn = pretty_print_vreg_element(rn, src_idx as usize, size, allocs); format!("mov {}, {}", rd, rn) } &Inst::VecRRLong { @@ -2898,8 +2083,8 @@ impl Inst { ("shll2", VectorSize::Size64x2, VectorSize::Size32x4, ", #32") } }; - let rd = show_vreg_vector(rd.to_reg(), mb_rru, rd_size); - let rn = show_vreg_vector(rn, mb_rru, size); + let rd = pretty_print_vreg_vector(rd.to_reg(), rd_size, allocs); + let rn = pretty_print_vreg_vector(rn, size, allocs); format!("{} {}, {}{}", op, rd, rn, suffix) } @@ -2995,8 +2180,8 @@ impl Inst { ("fcvtn2", VectorSize::Size32x4, VectorSize::Size64x2) } }; - let rd = show_vreg_vector(rd.to_reg(), mb_rru, rd_size); - let rn = show_vreg_vector(rn, mb_rru, size); + let rn = pretty_print_vreg_vector(rn, size, allocs); + let rd = pretty_print_vreg_vector(rd.to_reg(), rd_size, allocs); format!("{} {}, {}", op, rd, rn) } @@ -3004,8 +2189,8 @@ impl Inst { let op = match op { VecPairOp::Addp => "addp", }; - let rd = show_vreg_scalar(rd.to_reg(), mb_rru, ScalarSize::Size64); - let rn = show_vreg_vector(rn, mb_rru, VectorSize::Size64x2); + let rd = pretty_print_vreg_scalar(rd.to_reg(), ScalarSize::Size64, allocs); + let rn = pretty_print_vreg_vector(rn, VectorSize::Size64x2, allocs); format!("{} {}, {}", op, rd, rn) } @@ -3024,8 +2209,8 @@ impl Inst { ("uaddlp", VectorSize::Size32x4, VectorSize::Size16x8) } }; - let rd = show_vreg_vector(rd.to_reg(), mb_rru, dest); - let rn = show_vreg_vector(rn, mb_rru, src); + let rd = pretty_print_vreg_vector(rd.to_reg(), dest, allocs); + let rn = pretty_print_vreg_vector(rn, src, allocs); format!("{} {}, {}", op, rd, rn) } @@ -3075,9 +2260,9 @@ impl Inst { VecALUOp::Zip1 => ("zip1", size), VecALUOp::Sqrdmulh => ("sqrdmulh", size), }; - let rd = show_vreg_vector(rd.to_reg(), mb_rru, size); - let rn = show_vreg_vector(rn, mb_rru, size); - let rm = show_vreg_vector(rm, mb_rru, size); + let rd = pretty_print_vreg_vector(rd.to_reg(), size, allocs); + let rn = pretty_print_vreg_vector(rn, size, allocs); + let rm = pretty_print_vreg_vector(rm, size, allocs); format!("{} {}, {}, {}", op, rd, rn, rm) } &Inst::VecRRRLong { @@ -3143,9 +2328,9 @@ impl Inst { ("umlal2", VectorSize::Size64x2, VectorSize::Size32x4) } }; - let rd = show_vreg_vector(rd.to_reg(), mb_rru, dest_size); - let rn = show_vreg_vector(rn, mb_rru, src_size); - let rm = show_vreg_vector(rm, mb_rru, src_size); + let rd = pretty_print_vreg_vector(rd.to_reg(), dest_size, allocs); + let rn = pretty_print_vreg_vector(rn, src_size, allocs); + let rm = pretty_print_vreg_vector(rm, src_size, allocs); format!("{} {}, {}, {}", op, rd, rn, rm) } &Inst::VecMisc { op, rd, rn, size } => { @@ -3185,8 +2370,8 @@ impl Inst { VecMisc2::Fcmle0 => ("fcmle", size, ", #0.0"), VecMisc2::Fcmlt0 => ("fcmlt", size, ", #0.0"), }; - let rd = show_vreg_vector(rd.to_reg(), mb_rru, size); - let rn = show_vreg_vector(rn, mb_rru, size); + let rd = pretty_print_vreg_vector(rd.to_reg(), size, allocs); + let rn = pretty_print_vreg_vector(rn, size, allocs); format!("{} {}, {}{}", op, rd, rn, suffix) } &Inst::VecLanes { op, rd, rn, size } => { @@ -3194,8 +2379,8 @@ impl Inst { VecLanesOp::Uminv => "uminv", VecLanesOp::Addv => "addv", }; - let rd = show_vreg_scalar(rd.to_reg(), mb_rru, size.lane_size()); - let rn = show_vreg_vector(rn, mb_rru, size); + let rd = pretty_print_vreg_scalar(rd.to_reg(), size.lane_size(), allocs); + let rn = pretty_print_vreg_vector(rn, size, allocs); format!("{} {}, {}", op, rd, rn) } &Inst::VecShiftImm { @@ -3210,14 +2395,14 @@ impl Inst { VecShiftImmOp::Ushr => "ushr", VecShiftImmOp::Sshr => "sshr", }; - let rd = show_vreg_vector(rd.to_reg(), mb_rru, size); - let rn = show_vreg_vector(rn, mb_rru, size); + let rd = pretty_print_vreg_vector(rd.to_reg(), size, allocs); + let rn = pretty_print_vreg_vector(rn, size, allocs); format!("{} {}, {}, #{}", op, rd, rn, imm) } &Inst::VecExtract { rd, rn, rm, imm4 } => { - let rd = show_vreg_vector(rd.to_reg(), mb_rru, VectorSize::Size8x16); - let rn = show_vreg_vector(rn, mb_rru, VectorSize::Size8x16); - let rm = show_vreg_vector(rm, mb_rru, VectorSize::Size8x16); + let rd = pretty_print_vreg_vector(rd.to_reg(), VectorSize::Size8x16, allocs); + let rn = pretty_print_vreg_vector(rn, VectorSize::Size8x16, allocs); + let rm = pretty_print_vreg_vector(rm, VectorSize::Size8x16, allocs); format!("ext {}, {}, {}, #{}", rd, rn, rm, imm4) } &Inst::VecTbl { @@ -3227,9 +2412,9 @@ impl Inst { is_extension, } => { let op = if is_extension { "tbx" } else { "tbl" }; - let rd = show_vreg_vector(rd.to_reg(), mb_rru, VectorSize::Size8x16); - let rn = show_vreg_vector(rn, mb_rru, VectorSize::Size8x16); - let rm = show_vreg_vector(rm, mb_rru, VectorSize::Size8x16); + let rn = pretty_print_vreg_vector(rn, VectorSize::Size8x16, allocs); + let rm = pretty_print_vreg_vector(rm, VectorSize::Size8x16, allocs); + let rd = pretty_print_vreg_vector(rd.to_reg(), VectorSize::Size8x16, allocs); format!("{} {}, {{ {} }}, {}", op, rd, rn, rm) } &Inst::VecTbl2 { @@ -3240,34 +2425,34 @@ impl Inst { is_extension, } => { let op = if is_extension { "tbx" } else { "tbl" }; - let rd = show_vreg_vector(rd.to_reg(), mb_rru, VectorSize::Size8x16); - let rn = show_vreg_vector(rn, mb_rru, VectorSize::Size8x16); - let rn2 = show_vreg_vector(rn2, mb_rru, VectorSize::Size8x16); - let rm = show_vreg_vector(rm, mb_rru, VectorSize::Size8x16); + let rn = pretty_print_vreg_vector(rn, VectorSize::Size8x16, allocs); + let rn2 = pretty_print_vreg_vector(rn2, VectorSize::Size8x16, allocs); + let rm = pretty_print_vreg_vector(rm, VectorSize::Size8x16, allocs); + let rd = pretty_print_vreg_vector(rd.to_reg(), VectorSize::Size8x16, allocs); format!("{} {}, {{ {}, {} }}, {}", op, rd, rn, rn2, rm) } &Inst::VecLoadReplicate { rd, rn, size, .. } => { - let rd = show_vreg_vector(rd.to_reg(), mb_rru, size); - let rn = rn.show_rru(mb_rru); + let rd = pretty_print_vreg_vector(rd.to_reg(), size, allocs); + let rn = pretty_print_reg(rn, allocs); format!("ld1r {{ {} }}, [{}]", rd, rn) } &Inst::VecCSel { rd, rn, rm, cond } => { - let rd = show_vreg_vector(rd.to_reg(), mb_rru, VectorSize::Size8x16); - let rn = show_vreg_vector(rn, mb_rru, VectorSize::Size8x16); - let rm = show_vreg_vector(rm, mb_rru, VectorSize::Size8x16); - let cond = cond.show_rru(mb_rru); + let rd = pretty_print_vreg_vector(rd.to_reg(), VectorSize::Size8x16, allocs); + let rn = pretty_print_vreg_vector(rn, VectorSize::Size8x16, allocs); + let rm = pretty_print_vreg_vector(rm, VectorSize::Size8x16, allocs); + let cond = cond.pretty_print(0, allocs); format!( "vcsel {}, {}, {}, {} (if-then-else diamond)", rd, rn, rm, cond ) } &Inst::MovToNZCV { rn } => { - let rn = rn.show_rru(mb_rru); + let rn = pretty_print_reg(rn, allocs); format!("msr nzcv, {}", rn) } &Inst::MovFromNZCV { rd } => { - let rd = rd.to_reg().show_rru(mb_rru); + let rd = pretty_print_reg(rd.to_reg(), allocs); format!("mrs {}, nzcv", rd) } &Inst::Extend { @@ -3277,8 +2462,8 @@ impl Inst { from_bits: 1, .. } => { - let rd = show_ireg_sized(rd.to_reg(), mb_rru, OperandSize::Size32); - let rn = show_ireg_sized(rn, mb_rru, OperandSize::Size32); + let rd = pretty_print_ireg(rd.to_reg(), OperandSize::Size32, allocs); + let rn = pretty_print_ireg(rn, OperandSize::Size32, allocs); format!("and {}, {}, #1", rd, rn) } &Inst::Extend { @@ -3291,8 +2476,8 @@ impl Inst { // The case of a zero extension from 32 to 64 bits, is implemented // with a "mov" to a 32-bit (W-reg) dest, because this zeroes // the top 32 bits. - let rd = show_ireg_sized(rd.to_reg(), mb_rru, OperandSize::Size32); - let rn = show_ireg_sized(rn, mb_rru, OperandSize::Size32); + let rd = pretty_print_ireg(rd.to_reg(), OperandSize::Size32, allocs); + let rn = pretty_print_ireg(rn, OperandSize::Size32, allocs); format!("mov {}, {}", rd, rn) } &Inst::Extend { @@ -3314,8 +2499,8 @@ impl Inst { }; if op == "sbfx" || op == "ubfx" { let dest_size = OperandSize::from_bits(to_bits); - let rd = show_ireg_sized(rd.to_reg(), mb_rru, dest_size); - let rn = show_ireg_sized(rn, mb_rru, dest_size); + let rd = pretty_print_ireg(rd.to_reg(), dest_size, allocs); + let rn = pretty_print_ireg(rn, dest_size, allocs); format!("{} {}, {}, #0, #{}", op, rd, rn, from_bits) } else { let dest_size = if signed { @@ -3323,20 +2508,20 @@ impl Inst { } else { OperandSize::Size32 }; - let rd = show_ireg_sized(rd.to_reg(), mb_rru, dest_size); - let rn = show_ireg_sized(rn, mb_rru, OperandSize::from_bits(from_bits)); + let rd = pretty_print_ireg(rd.to_reg(), dest_size, allocs); + let rn = pretty_print_ireg(rn, OperandSize::from_bits(from_bits), allocs); format!("{} {}, {}", op, rd, rn) } } &Inst::Call { .. } => format!("bl 0"), &Inst::CallInd { ref info, .. } => { - let rn = info.rn.show_rru(mb_rru); + let rn = pretty_print_reg(info.rn, allocs); format!("blr {}", rn) } - &Inst::Ret => "ret".to_string(), + &Inst::Ret { .. } => "ret".to_string(), &Inst::EpiloguePlaceholder => "epilogue placeholder".to_string(), &Inst::Jump { ref dest } => { - let dest = dest.show_rru(mb_rru); + let dest = dest.pretty_print(0, allocs); format!("b {}", dest) } &Inst::CondBr { @@ -3344,45 +2529,45 @@ impl Inst { ref not_taken, ref kind, } => { - let taken = taken.show_rru(mb_rru); - let not_taken = not_taken.show_rru(mb_rru); + let taken = taken.pretty_print(0, allocs); + let not_taken = not_taken.pretty_print(0, allocs); match kind { &CondBrKind::Zero(reg) => { - let reg = reg.show_rru(mb_rru); + let reg = pretty_print_reg(reg, allocs); format!("cbz {}, {} ; b {}", reg, taken, not_taken) } &CondBrKind::NotZero(reg) => { - let reg = reg.show_rru(mb_rru); + let reg = pretty_print_reg(reg, allocs); format!("cbnz {}, {} ; b {}", reg, taken, not_taken) } &CondBrKind::Cond(c) => { - let c = c.show_rru(mb_rru); + let c = c.pretty_print(0, allocs); format!("b.{} {} ; b {}", c, taken, not_taken) } } } &Inst::IndirectBr { rn, .. } => { - let rn = rn.show_rru(mb_rru); + let rn = pretty_print_reg(rn, allocs); format!("br {}", rn) } &Inst::Brk => "brk #0".to_string(), &Inst::Udf { .. } => "udf".to_string(), &Inst::TrapIf { ref kind, .. } => match kind { &CondBrKind::Zero(reg) => { - let reg = reg.show_rru(mb_rru); + let reg = pretty_print_reg(reg, allocs); format!("cbnz {}, 8 ; udf", reg) } &CondBrKind::NotZero(reg) => { - let reg = reg.show_rru(mb_rru); + let reg = pretty_print_reg(reg, allocs); format!("cbz {}, 8 ; udf", reg) } &CondBrKind::Cond(c) => { - let c = c.invert().show_rru(mb_rru); + let c = c.invert().pretty_print(0, allocs); format!("b.{} 8 ; udf", c) } }, &Inst::Adr { rd, off } => { - let rd = rd.show_rru(mb_rru); + let rd = pretty_print_reg(rd.to_reg(), allocs); format!("adr {}, pc+{}", rd, off) } &Inst::Word4 { data } => format!("data.i32 {}", data), @@ -3394,10 +2579,10 @@ impl Inst { rtmp2, .. } => { - let ridx = ridx.show_rru(mb_rru); - let rtmp1 = rtmp1.show_rru(mb_rru); - let rtmp2 = rtmp2.show_rru(mb_rru); - let default_target = info.default_target.show_rru(mb_rru); + let ridx = pretty_print_reg(ridx, allocs); + let rtmp1 = pretty_print_reg(rtmp1.to_reg(), allocs); + let rtmp2 = pretty_print_reg(rtmp2.to_reg(), allocs); + let default_target = info.default_target.pretty_print(0, allocs); format!( concat!( "b.hs {} ; ", @@ -3424,7 +2609,7 @@ impl Inst { ref name, offset, } => { - let rd = rd.show_rru(mb_rru); + let rd = pretty_print_reg(rd.to_reg(), allocs); format!("ldr {}, 8 ; b 12 ; data {:?} + {}", rd, name, offset) } &Inst::LoadAddr { rd, ref mem } => { @@ -3432,10 +2617,14 @@ impl Inst { // this logic between `emit()` and `show_rru()` -- a separate 1-to-N // expansion stage (i.e., legalization, but without the slow edit-in-place // of the existing legalization framework). - let (mem_insts, mem) = mem_finalize(0, mem, state); + let rd = allocs.next_writable(rd); + let mem = mem.with_allocs(allocs); + let (mem_insts, mem) = mem_finalize(0, &mem, state); let mut ret = String::new(); for inst in mem_insts.into_iter() { - ret.push_str(&inst.show_rru(mb_rru)); + ret.push_str( + &inst.print_with_state(&mut EmitState::default(), &mut empty_allocs), + ); } let (reg, index_reg, offset) = match mem { AMode::RegExtended(r, idx, extendop) => (r, Some((idx, extendop)), 0), @@ -3462,10 +2651,14 @@ impl Inst { extendop, }; - ret.push_str(&add.show_rru(mb_rru)); + ret.push_str( + &add.print_with_state(&mut EmitState::default(), &mut empty_allocs), + ); } else if offset == 0 { let mov = Inst::gen_move(rd, reg, I64); - ret.push_str(&mov.show_rru(mb_rru)); + ret.push_str( + &mov.print_with_state(&mut EmitState::default(), &mut empty_allocs), + ); } else if let Some(imm12) = Imm12::maybe_from_u64(abs_offset) { let add = Inst::AluRRImm12 { alu_op, @@ -3474,11 +2667,15 @@ impl Inst { rn: reg, imm12, }; - ret.push_str(&add.show_rru(mb_rru)); + ret.push_str( + &add.print_with_state(&mut EmitState::default(), &mut empty_allocs), + ); } else { let tmp = writable_spilltmp_reg(); for inst in Inst::load_constant(tmp, abs_offset).into_iter() { - ret.push_str(&inst.show_rru(mb_rru)); + ret.push_str( + &inst.print_with_state(&mut EmitState::default(), &mut empty_allocs), + ); } let add = Inst::AluRRR { alu_op, @@ -3487,7 +2684,9 @@ impl Inst { rn: reg, rm: tmp.to_reg(), }; - ret.push_str(&add.show_rru(mb_rru)); + ret.push_str( + &add.print_with_state(&mut EmitState::default(), &mut empty_allocs), + ); } ret } @@ -3500,14 +2699,13 @@ impl Inst { &Inst::ElfTlsGetAddr { ref symbol } => { format!("elf_tls_get_addr {}", symbol) } - - &Inst::ValueLabelMarker { label, reg } => { - format!("value_label {:?}, {}", label, reg.show_rru(mb_rru)) - } - &Inst::Unwind { ref inst } => { format!("unwind {:?}", inst) } + &Inst::DummyUse { reg } => { + let reg = pretty_print_reg(reg, allocs); + format!("dummy_use {}", reg) + } } } } diff --git a/cranelift/codegen/src/isa/aarch64/inst/regs.rs b/cranelift/codegen/src/isa/aarch64/inst/regs.rs index 0b4babe04a..ad74d662b4 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/regs.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/regs.rs @@ -3,11 +3,13 @@ use crate::isa::aarch64::inst::OperandSize; use crate::isa::aarch64::inst::ScalarSize; use crate::isa::aarch64::inst::VectorSize; +use crate::machinst::AllocationConsumer; +use crate::machinst::RealReg; +use crate::machinst::{Reg, RegClass, Writable}; use crate::settings; - -use regalloc::{ - PrettyPrint, RealRegUniverse, Reg, RegClass, RegClassInfo, Writable, NUM_REG_CLASSES, -}; +use regalloc2::MachineEnv; +use regalloc2::PReg; +use regalloc2::VReg; use std::string::{String, ToString}; @@ -19,40 +21,12 @@ use std::string::{String, ToString}; /// https://searchfox.org/mozilla-central/source/js/src/jit/arm64/Assembler-arm64.h#103 pub const PINNED_REG: u8 = 21; -#[rustfmt::skip] -const XREG_INDICES: [u8; 31] = [ - // X0 - X7 - 32, 33, 34, 35, 36, 37, 38, 39, - // X8 - X15 - 40, 41, 42, 43, 44, 45, 46, 47, - // X16, X17 - 58, 59, - // X18 - 60, - // X19, X20 - 48, 49, - // X21, put aside because it's the pinned register. - 57, - // X22 - X28 - 50, 51, 52, 53, 54, 55, 56, - // X29 (FP) - 61, - // X30 (LR) - 62, -]; - -const ZERO_REG_INDEX: u8 = 63; - -const SP_REG_INDEX: u8 = 64; - -/// Get a reference to an X-register (integer register). +/// Get a reference to an X-register (integer register). Do not use +/// this for xsp / xzr; we have two special registers for those. pub fn xreg(num: u8) -> Reg { assert!(num < 31); - Reg::new_real( - RegClass::I64, - /* enc = */ num, - /* index = */ XREG_INDICES[num as usize], - ) + let preg = PReg::new(num as usize, RegClass::Int); + Reg::from(VReg::new(preg.index(), RegClass::Int)) } /// Get a writable reference to an X-register. @@ -63,7 +37,8 @@ pub fn writable_xreg(num: u8) -> Writable { /// Get a reference to a V-register (vector/FP register). pub fn vreg(num: u8) -> Reg { assert!(num < 32); - Reg::new_real(RegClass::V128, /* enc = */ num, /* index = */ num) + let preg = PReg::new(num as usize, RegClass::Float); + Reg::from(VReg::new(preg.index(), RegClass::Float)) } /// Get a writable reference to a V-register. @@ -73,13 +48,8 @@ pub fn writable_vreg(num: u8) -> Writable { /// Get a reference to the zero-register. pub fn zero_reg() -> Reg { - // This should be the same as what xreg(31) returns, except that - // we use the special index into the register index space. - Reg::new_real( - RegClass::I64, - /* enc = */ 31, - /* index = */ ZERO_REG_INDEX, - ) + let preg = PReg::new(31, RegClass::Int); + Reg::from(VReg::new(preg.index(), RegClass::Int)) } /// Get a writable reference to the zero-register (this discards a result). @@ -89,16 +59,19 @@ pub fn writable_zero_reg() -> Writable { /// Get a reference to the stack-pointer register. pub fn stack_reg() -> Reg { - // XSP (stack) and XZR (zero) are logically different registers which have - // the same hardware encoding, and whose meaning, in real aarch64 - // instructions, is context-dependent. For convenience of - // universe-construction and for correct printing, we make them be two - // different real registers. - Reg::new_real( - RegClass::I64, - /* enc = */ 31, - /* index = */ SP_REG_INDEX, - ) + // XSP (stack) and XZR (zero) are logically different registers + // which have the same hardware encoding, and whose meaning, in + // real aarch64 instructions, is context-dependent. For extra + // correctness assurances and for correct printing, we make them + // be two different real registers from a regalloc perspective. + // + // We represent XZR as if it were xreg(31); XSP is xreg(31 + + // 32). The PReg bit-packing allows 6 bits (64 registers) so we + // make use of this extra space to distinguish xzr and xsp. We + // mask off the 6th bit (hw_enc & 31) to get the actual hardware + // register encoding. + let preg = PReg::new(31 + 32, RegClass::Int); + Reg::from(VReg::new(preg.index(), RegClass::Int)) } /// Get a writable reference to the stack-pointer register. @@ -159,158 +132,193 @@ pub fn writable_tmp2_reg() -> Writable { } /// Create the register universe for AArch64. -pub fn create_reg_universe(flags: &settings::Flags) -> RealRegUniverse { - let mut regs = vec![]; - let mut allocable_by_class = [None; NUM_REG_CLASSES]; - - // Numbering Scheme: we put V-regs first, then X-regs. The X-regs exclude several registers: - // x18 (globally reserved for platform-specific purposes), x29 (frame pointer), x30 (link - // register), x31 (stack pointer or zero register, depending on context). - - let v_reg_base = 0u8; // in contiguous real-register index space - let v_reg_count = 32; - for i in 0u8..v_reg_count { - let reg = Reg::new_real( - RegClass::V128, - /* enc = */ i, - /* index = */ v_reg_base + i, - ) - .to_real_reg(); - let name = format!("v{}", i); - regs.push((reg, name)); +pub fn create_reg_env(flags: &settings::Flags) -> MachineEnv { + fn preg(r: Reg) -> PReg { + r.to_real_reg().unwrap().into() } - let v_reg_last = v_reg_base + v_reg_count - 1; - // Add the X registers. N.B.: the order here must match the order implied - // by XREG_INDICES, ZERO_REG_INDEX, and SP_REG_INDEX above. - - let x_reg_base = 32u8; // in contiguous real-register index space - let mut x_reg_count = 0; - - let uses_pinned_reg = flags.enable_pinned_reg(); - - for i in 0u8..32u8 { - // See above for excluded registers. - if i == 16 || i == 17 || i == 18 || i == 29 || i == 30 || i == 31 || i == PINNED_REG { - continue; - } - let reg = Reg::new_real( - RegClass::I64, - /* enc = */ i, - /* index = */ x_reg_base + x_reg_count, - ) - .to_real_reg(); - let name = format!("x{}", i); - regs.push((reg, name)); - x_reg_count += 1; - } - let x_reg_last = x_reg_base + x_reg_count - 1; - - allocable_by_class[RegClass::I64.rc_to_usize()] = Some(RegClassInfo { - first: x_reg_base as usize, - last: x_reg_last as usize, - suggested_scratch: Some(XREG_INDICES[19] as usize), - }); - allocable_by_class[RegClass::V128.rc_to_usize()] = Some(RegClassInfo { - first: v_reg_base as usize, - last: v_reg_last as usize, - suggested_scratch: Some(/* V31: */ 31), - }); - - // Other regs, not available to the allocator. - let allocable = if uses_pinned_reg { - // The pinned register is not allocatable in this case, so record the length before adding - // it. - let len = regs.len(); - regs.push((xreg(PINNED_REG).to_real_reg(), "x21/pinned_reg".to_string())); - len - } else { - regs.push((xreg(PINNED_REG).to_real_reg(), "x21".to_string())); - regs.len() + let mut env = MachineEnv { + preferred_regs_by_class: [ + vec![ + preg(xreg(0)), + preg(xreg(1)), + preg(xreg(2)), + preg(xreg(3)), + preg(xreg(4)), + preg(xreg(5)), + preg(xreg(6)), + preg(xreg(7)), + preg(xreg(8)), + preg(xreg(9)), + preg(xreg(10)), + preg(xreg(11)), + preg(xreg(12)), + preg(xreg(13)), + preg(xreg(14)), + preg(xreg(15)), + // x16 and x17 are spilltmp and tmp2 (see above). + // x19-28 are callee-saved and so not preferred. + // x21 is the pinned register (if enabled) and not allocatable if so. + // x29 is FP, x30 is LR, x31 is SP/ZR. + ], + vec![ + preg(vreg(0)), + preg(vreg(1)), + preg(vreg(2)), + preg(vreg(3)), + preg(vreg(4)), + preg(vreg(5)), + preg(vreg(6)), + preg(vreg(7)), + preg(vreg(8)), + preg(vreg(9)), + preg(vreg(10)), + preg(vreg(11)), + preg(vreg(12)), + preg(vreg(13)), + preg(vreg(14)), + preg(vreg(15)), + ], + ], + non_preferred_regs_by_class: [ + vec![ + preg(xreg(19)), + preg(xreg(20)), + // x21 is pinned reg if enabled; we add to this list below if not. + preg(xreg(22)), + preg(xreg(23)), + preg(xreg(24)), + preg(xreg(25)), + preg(xreg(26)), + preg(xreg(27)), + preg(xreg(28)), + ], + vec![ + preg(vreg(16)), + preg(vreg(17)), + preg(vreg(18)), + preg(vreg(19)), + preg(vreg(20)), + preg(vreg(21)), + preg(vreg(22)), + preg(vreg(23)), + preg(vreg(24)), + preg(vreg(25)), + preg(vreg(26)), + preg(vreg(27)), + preg(vreg(28)), + preg(vreg(29)), + preg(vreg(30)), + // v31 is the scratch reg, to allow for parallel moves. + ], + ], + scratch_by_class: [ + // We use tmp2 (x17) as the regalloc scratch register, + // used to resolve cyclic parallel moves. This is valid + // because tmp2 is never live between regalloc-visible + // instructions, only within them (i.e. in expansion into + // multiple machine instructions when that + // occurs). spilltmp is used for moves to/from spillslots, + // but tmp2 never is, so it is available for this + // purpose. (Its only other use is in prologue stack + // checks, and the prologue is prepended after regalloc + // runs.) + preg(tmp2_reg()), + // We use v31 for Float/Vec-class parallel moves. + preg(vreg(31)), + ], + fixed_stack_slots: vec![], }; - regs.push((xreg(16).to_real_reg(), "x16".to_string())); - regs.push((xreg(17).to_real_reg(), "x17".to_string())); - regs.push((xreg(18).to_real_reg(), "x18".to_string())); - regs.push((fp_reg().to_real_reg(), "fp".to_string())); - regs.push((link_reg().to_real_reg(), "lr".to_string())); - regs.push((zero_reg().to_real_reg(), "xzr".to_string())); - regs.push((stack_reg().to_real_reg(), "sp".to_string())); - - // FIXME JRS 2020Feb06: unfortunately this pushes the number of real regs - // to 65, which is potentially inconvenient from a compiler performance - // standpoint. We could possibly drop back to 64 by "losing" a vector - // register in future. - - // Assert sanity: the indices in the register structs must match their - // actual indices in the array. - for (i, reg) in regs.iter().enumerate() { - assert_eq!(i, reg.0.get_index()); + if !flags.enable_pinned_reg() { + debug_assert_eq!(PINNED_REG, 21); // We assumed this above in hardcoded reg list. + env.non_preferred_regs_by_class[0].push(preg(xreg(PINNED_REG))); } - RealRegUniverse { - regs, - allocable, - allocable_by_class, + env +} + +// PrettyPrint cannot be implemented for Reg; we need to invoke +// backend-specific functions from higher level (inst, arg, ...) +// types. + +fn show_ireg(reg: RealReg) -> String { + match reg.hw_enc() { + 29 => "fp".to_string(), + 30 => "lr".to_string(), + 31 => "xzr".to_string(), + 63 => "sp".to_string(), + x => { + debug_assert!(x < 29); + format!("x{}", x) + } } } -/// If `ireg` denotes an I64-classed reg, make a best-effort attempt to show +fn show_vreg(reg: RealReg) -> String { + format!("v{}", reg.hw_enc() & 31) +} + +fn show_reg(reg: Reg) -> String { + if let Some(rreg) = reg.to_real_reg() { + match rreg.class() { + RegClass::Int => show_ireg(rreg), + RegClass::Float => show_vreg(rreg), + } + } else { + format!("%{:?}", reg) + } +} + +pub fn pretty_print_reg(reg: Reg, allocs: &mut AllocationConsumer<'_>) -> String { + let reg = allocs.next(reg); + show_reg(reg) +} + +/// If `ireg` denotes an Int-classed reg, make a best-effort attempt to show /// its name at the 32-bit size. -pub fn show_ireg_sized(reg: Reg, mb_rru: Option<&RealRegUniverse>, size: OperandSize) -> String { - let mut s = reg.show_rru(mb_rru); - if reg.get_class() != RegClass::I64 || !size.is32() { +pub fn show_ireg_sized(reg: Reg, size: OperandSize) -> String { + let mut s = show_reg(reg); + if reg.class() != RegClass::Int || !size.is32() { // We can't do any better. return s; } - if reg.is_real() { - // Change (eg) "x42" into "w42" as appropriate - if reg.get_class() == RegClass::I64 && size.is32() && s.starts_with("x") { - s = "w".to_string() + &s[1..]; - } - } else { - // Add a "w" suffix to RegClass::I64 vregs used in a 32-bit role - if reg.get_class() == RegClass::I64 && size.is32() { - s.push('w'); - } + // Change (eg) "x42" into "w42" as appropriate + if reg.class() == RegClass::Int && size.is32() && s.starts_with("x") { + s = "w".to_string() + &s[1..]; } + s } /// Show a vector register used in a scalar context. -pub fn show_vreg_scalar(reg: Reg, mb_rru: Option<&RealRegUniverse>, size: ScalarSize) -> String { - let mut s = reg.show_rru(mb_rru); - if reg.get_class() != RegClass::V128 { +pub fn show_vreg_scalar(reg: Reg, size: ScalarSize) -> String { + let mut s = show_reg(reg); + if reg.class() != RegClass::Float { // We can't do any better. return s; } - if reg.is_real() { - // Change (eg) "v0" into "d0". - if s.starts_with("v") { - let replacement = match size { - ScalarSize::Size8 => "b", - ScalarSize::Size16 => "h", - ScalarSize::Size32 => "s", - ScalarSize::Size64 => "d", - ScalarSize::Size128 => "q", - }; - s.replace_range(0..1, replacement); - } - } else { - // Add a "d" suffix to RegClass::V128 vregs. - if reg.get_class() == RegClass::V128 { - s.push('d'); - } + // Change (eg) "v0" into "d0". + if s.starts_with("v") { + let replacement = match size { + ScalarSize::Size8 => "b", + ScalarSize::Size16 => "h", + ScalarSize::Size32 => "s", + ScalarSize::Size64 => "d", + ScalarSize::Size128 => "q", + }; + s.replace_range(0..1, replacement); } + s } /// Show a vector register. -pub fn show_vreg_vector(reg: Reg, mb_rru: Option<&RealRegUniverse>, size: VectorSize) -> String { - assert_eq!(RegClass::V128, reg.get_class()); - let mut s = reg.show_rru(mb_rru); +pub fn show_vreg_vector(reg: Reg, size: VectorSize) -> String { + assert_eq!(RegClass::Float, reg.class()); + let mut s = show_reg(reg); let suffix = match size { VectorSize::Size8x8 => ".8b", @@ -327,25 +335,54 @@ pub fn show_vreg_vector(reg: Reg, mb_rru: Option<&RealRegUniverse>, size: Vector } /// Show an indexed vector element. -pub fn show_vreg_element( - reg: Reg, - mb_rru: Option<&RealRegUniverse>, - idx: u8, - size: VectorSize, -) -> String { - assert_eq!(RegClass::V128, reg.get_class()); - let mut s = reg.show_rru(mb_rru); - +pub fn show_vreg_element(reg: Reg, idx: u8, size: VectorSize) -> String { + assert_eq!(RegClass::Float, reg.class()); + let s = show_reg(reg); let suffix = match size { - VectorSize::Size8x8 => "b", - VectorSize::Size8x16 => "b", - VectorSize::Size16x4 => "h", - VectorSize::Size16x8 => "h", - VectorSize::Size32x2 => "s", - VectorSize::Size32x4 => "s", - VectorSize::Size64x2 => "d", + VectorSize::Size8x8 => ".b", + VectorSize::Size8x16 => ".b", + VectorSize::Size16x4 => ".h", + VectorSize::Size16x8 => ".h", + VectorSize::Size32x2 => ".s", + VectorSize::Size32x4 => ".s", + VectorSize::Size64x2 => ".d", }; - - s.push_str(&format!(".{}[{}]", suffix, idx)); - s + format!("{}{}[{}]", s, suffix, idx) +} + +pub fn pretty_print_ireg( + reg: Reg, + size: OperandSize, + allocs: &mut AllocationConsumer<'_>, +) -> String { + let reg = allocs.next(reg); + show_ireg_sized(reg, size) +} + +pub fn pretty_print_vreg_scalar( + reg: Reg, + size: ScalarSize, + allocs: &mut AllocationConsumer<'_>, +) -> String { + let reg = allocs.next(reg); + show_vreg_scalar(reg, size) +} + +pub fn pretty_print_vreg_vector( + reg: Reg, + size: VectorSize, + allocs: &mut AllocationConsumer<'_>, +) -> String { + let reg = allocs.next(reg); + show_vreg_vector(reg, size) +} + +pub fn pretty_print_vreg_element( + reg: Reg, + idx: usize, + size: VectorSize, + allocs: &mut AllocationConsumer<'_>, +) -> String { + let reg = allocs.next(reg); + show_vreg_element(reg, idx as u8, size) } diff --git a/cranelift/codegen/src/isa/aarch64/inst/unwind/systemv.rs b/cranelift/codegen/src/isa/aarch64/inst/unwind/systemv.rs index 12651427b6..77f65862a3 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/unwind/systemv.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/unwind/systemv.rs @@ -2,8 +2,8 @@ use crate::isa::aarch64::inst::regs; use crate::isa::unwind::systemv::RegisterMappingError; +use crate::machinst::{Reg, RegClass}; use gimli::{write::CommonInformationEntry, Encoding, Format, Register}; -use regalloc::{Reg, RegClass}; /// Creates a new aarch64 common information entry (CIE). pub fn create_cie() -> CommonInformationEntry { @@ -17,11 +17,11 @@ pub fn create_cie() -> CommonInformationEntry { }, 4, // Code alignment factor -8, // Data alignment factor - Register(regs::link_reg().get_hw_encoding().into()), + Register(regs::link_reg().to_real_reg().unwrap().hw_enc().into()), ); // Every frame will start with the call frame address (CFA) at SP - let sp = Register(regs::stack_reg().get_hw_encoding().into()); + let sp = Register((regs::stack_reg().to_real_reg().unwrap().hw_enc() & 31).into()); entry.add_instruction(CallFrameInstruction::Cfa(sp, 0)); entry @@ -34,16 +34,15 @@ pub fn map_reg(reg: Reg) -> Result { // https://developer.arm.com/documentation/ihi0057/e/?lang=en#dwarf-register-names // // X0--X31 is 0--31; V0--V31 is 64--95. - match reg.get_class() { - RegClass::I64 => { - let reg = reg.get_hw_encoding() as u16; + match reg.class() { + RegClass::Int => { + let reg = (reg.to_real_reg().unwrap().hw_enc() & 31) as u16; Ok(Register(reg)) } - RegClass::V128 => { - let reg = reg.get_hw_encoding() as u16; + RegClass::Float => { + let reg = reg.to_real_reg().unwrap().hw_enc() as u16; Ok(Register(64 + reg)) } - _ => Err(RegisterMappingError::UnsupportedRegisterBank("class?")), } } @@ -54,13 +53,13 @@ impl crate::isa::unwind::systemv::RegisterMapper for RegisterMapper { Ok(map_reg(reg)?.0) } fn sp(&self) -> u16 { - regs::stack_reg().get_hw_encoding().into() + (regs::stack_reg().to_real_reg().unwrap().hw_enc() & 31).into() } fn fp(&self) -> Option { - Some(regs::fp_reg().get_hw_encoding().into()) + Some(regs::fp_reg().to_real_reg().unwrap().hw_enc().into()) } fn lr(&self) -> Option { - Some(regs::link_reg().get_hw_encoding().into()) + Some(regs::link_reg().to_real_reg().unwrap().hw_enc().into()) } fn lr_offset(&self) -> Option { Some(8) diff --git a/cranelift/codegen/src/isa/aarch64/lower.rs b/cranelift/codegen/src/isa/aarch64/lower.rs index 6ff8e0dedf..c84ed6afb5 100644 --- a/cranelift/codegen/src/isa/aarch64/lower.rs +++ b/cranelift/codegen/src/isa/aarch64/lower.rs @@ -7,21 +7,18 @@ //! //! - Floating-point immediates (FIMM instruction). +use super::lower_inst; +use crate::data_value::DataValue; use crate::ir::condcodes::{FloatCC, IntCC}; use crate::ir::types::*; use crate::ir::Inst as IRInst; use crate::ir::{Opcode, Type, Value}; -use crate::machinst::lower::*; -use crate::machinst::*; -use crate::{CodegenError, CodegenResult}; - use crate::isa::aarch64::inst::*; use crate::isa::aarch64::AArch64Backend; - -use super::lower_inst; - -use crate::data_value::DataValue; -use regalloc::{Reg, Writable}; +use crate::machinst::lower::*; +use crate::machinst::*; +use crate::machinst::{Reg, Writable}; +use crate::{CodegenError, CodegenResult}; use smallvec::SmallVec; use std::cmp; diff --git a/cranelift/codegen/src/isa/aarch64/lower/isle.rs b/cranelift/codegen/src/isa/aarch64/lower/isle.rs index 73e5b6d4b7..da397aa660 100644 --- a/cranelift/codegen/src/isa/aarch64/lower/isle.rs +++ b/cranelift/codegen/src/isa/aarch64/lower/isle.rs @@ -17,9 +17,8 @@ use crate::{ binemit::CodeOffset, ir::{ immediates::*, types::*, ExternalName, Inst, InstructionData, MemFlags, TrapCode, Value, - ValueLabel, ValueList, + ValueList, }, - isa::aarch64::inst::aarch64_map_regs, isa::aarch64::inst::args::{ShiftOp, ShiftOpShiftImm}, isa::unwind::UnwindInst, machinst::{ty_bits, InsnOutput, LowerCtx}, @@ -45,15 +44,9 @@ pub(crate) fn lower( where C: LowerCtx, { - lower_common( - lower_ctx, - flags, - isa_flags, - outputs, - inst, - |cx, insn| generated_code::constructor_lower(cx, insn), - aarch64_map_regs, - ) + lower_common(lower_ctx, flags, isa_flags, outputs, inst, |cx, insn| { + generated_code::constructor_lower(cx, insn) + }) } pub struct ExtendedValue { @@ -200,11 +193,7 @@ where } fn emit(&mut self, inst: &MInst) -> Unit { - self.emitted_insts.push((inst.clone(), false)); - } - - fn emit_safepoint(&mut self, inst: &MInst) -> Unit { - self.emitted_insts.push((inst.clone(), true)); + self.lower_ctx.emit(inst.clone()); } fn cond_br_zero(&mut self, reg: Reg) -> CondBrKind { diff --git a/cranelift/codegen/src/isa/aarch64/lower/isle/generated_code.manifest b/cranelift/codegen/src/isa/aarch64/lower/isle/generated_code.manifest index 08ec6024da..0d989e229c 100644 --- a/cranelift/codegen/src/isa/aarch64/lower/isle/generated_code.manifest +++ b/cranelift/codegen/src/isa/aarch64/lower/isle/generated_code.manifest @@ -1,4 +1,4 @@ src/clif.isle 443b34b797fc8ace -src/prelude.isle c0751050a11e2686 -src/isa/aarch64/inst.isle 19ccefb6a496d392 +src/prelude.isle afd037c4d91c875c +src/isa/aarch64/inst.isle 544b7126192140d5 src/isa/aarch64/lower.isle d88b62dd6b40622 diff --git a/cranelift/codegen/src/isa/aarch64/lower/isle/generated_code.rs b/cranelift/codegen/src/isa/aarch64/lower/isle/generated_code.rs index a73e90405b..684aa0be22 100644 --- a/cranelift/codegen/src/isa/aarch64/lower/isle/generated_code.rs +++ b/cranelift/codegen/src/isa/aarch64/lower/isle/generated_code.rs @@ -79,7 +79,6 @@ pub trait Context { fn def_inst(&mut self, arg0: Value) -> Option; fn offset32_to_u32(&mut self, arg0: Offset32) -> u32; fn emit(&mut self, arg0: &MInst) -> Unit; - fn emit_safepoint(&mut self, arg0: &MInst) -> Unit; fn trap_code_division_by_zero(&mut self) -> TrapCode; fn trap_code_integer_overflow(&mut self) -> TrapCode; fn trap_code_bad_conversion_to_integer(&mut self) -> TrapCode; @@ -132,13 +131,13 @@ pub trait Context { fn rotr_opposite_amount(&mut self, arg0: Type, arg1: ImmShift) -> ImmShift; } -/// Internal type SideEffectNoResult: defined at src/prelude.isle line 405. +/// Internal type SideEffectNoResult: defined at src/prelude.isle line 402. #[derive(Clone, Debug)] pub enum SideEffectNoResult { Inst { inst: MInst }, } -/// Internal type ProducesFlags: defined at src/prelude.isle line 427. +/// Internal type ProducesFlags: defined at src/prelude.isle line 418. #[derive(Clone, Debug)] pub enum ProducesFlags { ProducesFlagsSideEffect { inst: MInst }, @@ -146,7 +145,7 @@ pub enum ProducesFlags { ProducesFlagsReturnsResultWithConsumer { inst: MInst, result: Reg }, } -/// Internal type ConsumesFlags: defined at src/prelude.isle line 438. +/// Internal type ConsumesFlags: defined at src/prelude.isle line 429. #[derive(Clone, Debug)] pub enum ConsumesFlags { ConsumesFlagsReturnsResultWithProducer { @@ -681,7 +680,9 @@ pub enum MInst { CallInd { info: BoxCallIndInfo, }, - Ret, + Ret { + rets: VecReg, + }, EpiloguePlaceholder, Jump { dest: BranchTarget, @@ -737,16 +738,15 @@ pub enum MInst { ElfTlsGetAddr { symbol: ExternalName, }, - ValueLabelMarker { - reg: Reg, - label: ValueLabel, - }, Unwind { inst: UnwindInst, }, + DummyUse { + reg: Reg, + }, } -/// Internal type ALUOp: defined at src/isa/aarch64/inst.isle line 796. +/// Internal type ALUOp: defined at src/isa/aarch64/inst.isle line 795. #[derive(Copy, Clone, PartialEq, Eq, Debug)] pub enum ALUOp { Add, @@ -774,7 +774,7 @@ pub enum ALUOp { SbcS, } -/// Internal type ALUOp3: defined at src/isa/aarch64/inst.isle line 834. +/// Internal type ALUOp3: defined at src/isa/aarch64/inst.isle line 833. #[derive(Copy, Clone, PartialEq, Eq, Debug)] pub enum ALUOp3 { MAdd32, @@ -783,7 +783,7 @@ pub enum ALUOp3 { MSub64, } -/// Internal type BitOp: defined at src/isa/aarch64/inst.isle line 877. +/// Internal type BitOp: defined at src/isa/aarch64/inst.isle line 876. #[derive(Copy, Clone, PartialEq, Eq, Debug)] pub enum BitOp { RBit, @@ -791,7 +791,7 @@ pub enum BitOp { Cls, } -/// Internal type FPUOp1: defined at src/isa/aarch64/inst.isle line 944. +/// Internal type FPUOp1: defined at src/isa/aarch64/inst.isle line 943. #[derive(Copy, Clone, PartialEq, Eq, Debug)] pub enum FPUOp1 { Abs32, @@ -804,7 +804,7 @@ pub enum FPUOp1 { Cvt64To32, } -/// Internal type FPUOp2: defined at src/isa/aarch64/inst.isle line 957. +/// Internal type FPUOp2: defined at src/isa/aarch64/inst.isle line 956. #[derive(Copy, Clone, PartialEq, Eq, Debug)] pub enum FPUOp2 { Add32, @@ -825,14 +825,14 @@ pub enum FPUOp2 { Uqsub64, } -/// Internal type FPUOp3: defined at src/isa/aarch64/inst.isle line 982. +/// Internal type FPUOp3: defined at src/isa/aarch64/inst.isle line 981. #[derive(Copy, Clone, PartialEq, Eq, Debug)] pub enum FPUOp3 { MAdd32, MAdd64, } -/// Internal type FpuToIntOp: defined at src/isa/aarch64/inst.isle line 989. +/// Internal type FpuToIntOp: defined at src/isa/aarch64/inst.isle line 988. #[derive(Copy, Clone, PartialEq, Eq, Debug)] pub enum FpuToIntOp { F32ToU32, @@ -845,7 +845,7 @@ pub enum FpuToIntOp { F64ToI64, } -/// Internal type IntToFpuOp: defined at src/isa/aarch64/inst.isle line 1002. +/// Internal type IntToFpuOp: defined at src/isa/aarch64/inst.isle line 1001. #[derive(Copy, Clone, PartialEq, Eq, Debug)] pub enum IntToFpuOp { U32ToF32, @@ -858,7 +858,7 @@ pub enum IntToFpuOp { I64ToF64, } -/// Internal type FpuRoundMode: defined at src/isa/aarch64/inst.isle line 1016. +/// Internal type FpuRoundMode: defined at src/isa/aarch64/inst.isle line 1015. #[derive(Copy, Clone, PartialEq, Eq, Debug)] pub enum FpuRoundMode { Minus32, @@ -871,7 +871,7 @@ pub enum FpuRoundMode { Nearest64, } -/// Internal type VecExtendOp: defined at src/isa/aarch64/inst.isle line 1029. +/// Internal type VecExtendOp: defined at src/isa/aarch64/inst.isle line 1028. #[derive(Copy, Clone, PartialEq, Eq, Debug)] pub enum VecExtendOp { Sxtl8, @@ -882,7 +882,7 @@ pub enum VecExtendOp { Uxtl32, } -/// Internal type VecALUOp: defined at src/isa/aarch64/inst.isle line 1046. +/// Internal type VecALUOp: defined at src/isa/aarch64/inst.isle line 1045. #[derive(Copy, Clone, PartialEq, Eq, Debug)] pub enum VecALUOp { Sqadd, @@ -924,7 +924,7 @@ pub enum VecALUOp { Sqrdmulh, } -/// Internal type VecMisc2: defined at src/isa/aarch64/inst.isle line 1125. +/// Internal type VecMisc2: defined at src/isa/aarch64/inst.isle line 1124. #[derive(Copy, Clone, PartialEq, Eq, Debug)] pub enum VecMisc2 { Not, @@ -955,7 +955,7 @@ pub enum VecMisc2 { Fcmlt0, } -/// Internal type VecRRLongOp: defined at src/isa/aarch64/inst.isle line 1182. +/// Internal type VecRRLongOp: defined at src/isa/aarch64/inst.isle line 1181. #[derive(Copy, Clone, PartialEq, Eq, Debug)] pub enum VecRRLongOp { Fcvtl16, @@ -965,7 +965,7 @@ pub enum VecRRLongOp { Shll32, } -/// Internal type VecRRNarrowOp: defined at src/isa/aarch64/inst.isle line 1197. +/// Internal type VecRRNarrowOp: defined at src/isa/aarch64/inst.isle line 1196. #[derive(Copy, Clone, PartialEq, Eq, Debug)] pub enum VecRRNarrowOp { Xtn16, @@ -984,7 +984,7 @@ pub enum VecRRNarrowOp { Fcvtn64, } -/// Internal type VecRRRLongOp: defined at src/isa/aarch64/inst.isle line 1229. +/// Internal type VecRRRLongOp: defined at src/isa/aarch64/inst.isle line 1228. #[derive(Copy, Clone, PartialEq, Eq, Debug)] pub enum VecRRRLongOp { Smull8, @@ -998,13 +998,13 @@ pub enum VecRRRLongOp { Umlal32, } -/// Internal type VecPairOp: defined at src/isa/aarch64/inst.isle line 1246. +/// Internal type VecPairOp: defined at src/isa/aarch64/inst.isle line 1245. #[derive(Copy, Clone, PartialEq, Eq, Debug)] pub enum VecPairOp { Addp, } -/// Internal type VecRRPairLongOp: defined at src/isa/aarch64/inst.isle line 1254. +/// Internal type VecRRPairLongOp: defined at src/isa/aarch64/inst.isle line 1253. #[derive(Copy, Clone, PartialEq, Eq, Debug)] pub enum VecRRPairLongOp { Saddlp8, @@ -1013,14 +1013,14 @@ pub enum VecRRPairLongOp { Uaddlp16, } -/// Internal type VecLanesOp: defined at src/isa/aarch64/inst.isle line 1265. +/// Internal type VecLanesOp: defined at src/isa/aarch64/inst.isle line 1264. #[derive(Copy, Clone, PartialEq, Eq, Debug)] pub enum VecLanesOp { Addv, Uminv, } -/// Internal type VecShiftImmOp: defined at src/isa/aarch64/inst.isle line 1274. +/// Internal type VecShiftImmOp: defined at src/isa/aarch64/inst.isle line 1273. #[derive(Copy, Clone, PartialEq, Eq, Debug)] pub enum VecShiftImmOp { Shl, @@ -1028,7 +1028,7 @@ pub enum VecShiftImmOp { Sshr, } -/// Internal type AtomicRMWOp: defined at src/isa/aarch64/inst.isle line 1285. +/// Internal type AtomicRMWOp: defined at src/isa/aarch64/inst.isle line 1284. #[derive(Copy, Clone, PartialEq, Eq, Debug)] pub enum AtomicRMWOp { Add, @@ -1088,7 +1088,7 @@ pub fn constructor_side_effect( inst: ref pattern1_0, } = pattern0_0 { - // Rule at src/prelude.isle line 410. + // Rule at src/prelude.isle line 407. let expr0_0 = C::emit(ctx, pattern1_0); let expr1_0 = C::output_none(ctx); return Some(expr1_0); @@ -1096,24 +1096,6 @@ pub fn constructor_side_effect( return None; } -// Generated as internal constructor for term safepoint. -pub fn constructor_safepoint( - ctx: &mut C, - arg0: &SideEffectNoResult, -) -> Option { - let pattern0_0 = arg0; - if let &SideEffectNoResult::Inst { - inst: ref pattern1_0, - } = pattern0_0 - { - // Rule at src/prelude.isle line 416. - let expr0_0 = C::emit_safepoint(ctx, pattern1_0); - let expr1_0 = C::output_none(ctx); - return Some(expr1_0); - } - return None; -} - // Generated as internal constructor for term produces_flags_get_reg. pub fn constructor_produces_flags_get_reg( ctx: &mut C, @@ -1125,7 +1107,7 @@ pub fn constructor_produces_flags_get_reg( result: pattern1_1, } = pattern0_0 { - // Rule at src/prelude.isle line 454. + // Rule at src/prelude.isle line 445. return Some(pattern1_1); } return None; @@ -1142,7 +1124,7 @@ pub fn constructor_produces_flags_ignore( inst: ref pattern1_0, result: pattern1_1, } => { - // Rule at src/prelude.isle line 459. + // Rule at src/prelude.isle line 450. let expr0_0 = ProducesFlags::ProducesFlagsSideEffect { inst: pattern1_0.clone(), }; @@ -1152,7 +1134,7 @@ pub fn constructor_produces_flags_ignore( inst: ref pattern1_0, result: pattern1_1, } => { - // Rule at src/prelude.isle line 461. + // Rule at src/prelude.isle line 452. let expr0_0 = ProducesFlags::ProducesFlagsSideEffect { inst: pattern1_0.clone(), }; @@ -1181,7 +1163,7 @@ pub fn constructor_consumes_flags_concat( result: pattern3_1, } = pattern2_0 { - // Rule at src/prelude.isle line 468. + // Rule at src/prelude.isle line 459. let expr0_0 = C::value_regs(ctx, pattern1_1, pattern3_1); let expr1_0 = ConsumesFlags::ConsumesFlagsTwiceReturnsValueRegs { inst1: pattern1_0.clone(), @@ -1211,7 +1193,7 @@ pub fn constructor_with_flags( inst: ref pattern3_0, result: pattern3_1, } => { - // Rule at src/prelude.isle line 493. + // Rule at src/prelude.isle line 484. let expr0_0 = C::emit(ctx, pattern1_0); let expr1_0 = C::emit(ctx, pattern3_0); let expr2_0 = C::value_reg(ctx, pattern3_1); @@ -1222,7 +1204,7 @@ pub fn constructor_with_flags( inst2: ref pattern3_1, result: pattern3_2, } => { - // Rule at src/prelude.isle line 499. + // Rule at src/prelude.isle line 490. let expr0_0 = C::emit(ctx, pattern1_0); let expr1_0 = C::emit(ctx, pattern3_0); let expr2_0 = C::emit(ctx, pattern3_1); @@ -1235,7 +1217,7 @@ pub fn constructor_with_flags( inst4: ref pattern3_3, result: pattern3_4, } => { - // Rule at src/prelude.isle line 511. + // Rule at src/prelude.isle line 502. let expr0_0 = C::emit(ctx, pattern1_0); let expr1_0 = C::emit(ctx, pattern3_0); let expr2_0 = C::emit(ctx, pattern3_1); @@ -1256,7 +1238,7 @@ pub fn constructor_with_flags( result: pattern3_1, } = pattern2_0 { - // Rule at src/prelude.isle line 487. + // Rule at src/prelude.isle line 478. let expr0_0 = C::emit(ctx, pattern1_0); let expr1_0 = C::emit(ctx, pattern3_0); let expr2_0 = C::value_regs(ctx, pattern1_1, pattern3_1); @@ -1276,7 +1258,7 @@ pub fn constructor_with_flags_reg( ) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/prelude.isle line 528. + // Rule at src/prelude.isle line 519. let expr0_0 = constructor_with_flags(ctx, pattern0_0, pattern1_0)?; let expr1_0: usize = 0; let expr2_0 = C::value_regs_get(ctx, expr0_0, expr1_0); @@ -1287,12 +1269,12 @@ pub fn constructor_with_flags_reg( pub fn constructor_operand_size(ctx: &mut C, arg0: Type) -> Option { let pattern0_0 = arg0; if let Some(pattern1_0) = C::fits_in_32(ctx, pattern0_0) { - // Rule at src/isa/aarch64/inst.isle line 895. + // Rule at src/isa/aarch64/inst.isle line 894. let expr0_0 = OperandSize::Size32; return Some(expr0_0); } if let Some(pattern1_0) = C::fits_in_64(ctx, pattern0_0) { - // Rule at src/isa/aarch64/inst.isle line 896. + // Rule at src/isa/aarch64/inst.isle line 895. let expr0_0 = OperandSize::Size64; return Some(expr0_0); } @@ -1305,28 +1287,28 @@ pub fn constructor_vector_size(ctx: &mut C, arg0: Type) -> Option( ) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/aarch64/inst.isle line 1381. + // Rule at src/isa/aarch64/inst.isle line 1380. let expr0_0: Type = I64; let expr1_0 = C::temp_writable_reg(ctx, expr0_0); let expr2_0 = MInst::MovZ { @@ -1364,7 +1346,7 @@ pub fn constructor_movn( ) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/aarch64/inst.isle line 1388. + // Rule at src/isa/aarch64/inst.isle line 1387. let expr0_0: Type = I64; let expr1_0 = C::temp_writable_reg(ctx, expr0_0); let expr2_0 = MInst::MovN { @@ -1389,7 +1371,7 @@ pub fn constructor_alu_rr_imm_logic( let pattern1_0 = arg1; let pattern2_0 = arg2; let pattern3_0 = arg3; - // Rule at src/isa/aarch64/inst.isle line 1395. + // Rule at src/isa/aarch64/inst.isle line 1394. let expr0_0: Type = I64; let expr1_0 = C::temp_writable_reg(ctx, expr0_0); let expr2_0 = constructor_operand_size(ctx, pattern1_0)?; @@ -1417,7 +1399,7 @@ pub fn constructor_alu_rr_imm_shift( let pattern1_0 = arg1; let pattern2_0 = arg2; let pattern3_0 = arg3; - // Rule at src/isa/aarch64/inst.isle line 1402. + // Rule at src/isa/aarch64/inst.isle line 1401. let expr0_0: Type = I64; let expr1_0 = C::temp_writable_reg(ctx, expr0_0); let expr2_0 = constructor_operand_size(ctx, pattern1_0)?; @@ -1445,7 +1427,7 @@ pub fn constructor_alu_rrr( let pattern1_0 = arg1; let pattern2_0 = arg2; let pattern3_0 = arg3; - // Rule at src/isa/aarch64/inst.isle line 1409. + // Rule at src/isa/aarch64/inst.isle line 1408. let expr0_0: Type = I64; let expr1_0 = C::temp_writable_reg(ctx, expr0_0); let expr2_0 = constructor_operand_size(ctx, pattern1_0)?; @@ -1473,7 +1455,7 @@ pub fn constructor_vec_rrr( let pattern1_0 = arg1; let pattern2_0 = arg2; let pattern3_0 = arg3; - // Rule at src/isa/aarch64/inst.isle line 1416. + // Rule at src/isa/aarch64/inst.isle line 1415. let expr0_0: Type = I8X16; let expr1_0 = C::temp_writable_reg(ctx, expr0_0); let expr2_0 = MInst::VecRRR { @@ -1498,7 +1480,7 @@ pub fn constructor_vec_lanes( let pattern0_0 = arg0; let pattern1_0 = arg1; let pattern2_0 = arg2; - // Rule at src/isa/aarch64/inst.isle line 1423. + // Rule at src/isa/aarch64/inst.isle line 1422. let expr0_0: Type = I8X16; let expr1_0 = C::temp_writable_reg(ctx, expr0_0); let expr2_0 = MInst::VecLanes { @@ -1516,7 +1498,7 @@ pub fn constructor_vec_lanes( pub fn constructor_vec_dup(ctx: &mut C, arg0: Reg, arg1: &VectorSize) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/aarch64/inst.isle line 1430. + // Rule at src/isa/aarch64/inst.isle line 1429. let expr0_0: Type = I8X16; let expr1_0 = C::temp_writable_reg(ctx, expr0_0); let expr2_0 = MInst::VecDup { @@ -1541,7 +1523,7 @@ pub fn constructor_alu_rr_imm12( let pattern1_0 = arg1; let pattern2_0 = arg2; let pattern3_0 = arg3; - // Rule at src/isa/aarch64/inst.isle line 1437. + // Rule at src/isa/aarch64/inst.isle line 1436. let expr0_0: Type = I64; let expr1_0 = C::temp_writable_reg(ctx, expr0_0); let expr2_0 = constructor_operand_size(ctx, pattern1_0)?; @@ -1571,7 +1553,7 @@ pub fn constructor_alu_rrr_shift( let pattern2_0 = arg2; let pattern3_0 = arg3; let pattern4_0 = arg4; - // Rule at src/isa/aarch64/inst.isle line 1444. + // Rule at src/isa/aarch64/inst.isle line 1443. let expr0_0: Type = I64; let expr1_0 = C::temp_writable_reg(ctx, expr0_0); let expr2_0 = constructor_operand_size(ctx, pattern1_0)?; @@ -1602,7 +1584,7 @@ pub fn constructor_alu_rrr_extend( let pattern2_0 = arg2; let pattern3_0 = arg3; let pattern4_0 = arg4; - // Rule at src/isa/aarch64/inst.isle line 1451. + // Rule at src/isa/aarch64/inst.isle line 1450. let expr0_0: Type = I64; let expr1_0 = C::temp_writable_reg(ctx, expr0_0); let expr2_0 = constructor_operand_size(ctx, pattern1_0)?; @@ -1631,7 +1613,7 @@ pub fn constructor_alu_rr_extend_reg( let pattern1_0 = arg1; let pattern2_0 = arg2; let pattern3_0 = arg3; - // Rule at src/isa/aarch64/inst.isle line 1459. + // Rule at src/isa/aarch64/inst.isle line 1458. let expr0_0 = C::put_extended_in_reg(ctx, pattern3_0); let expr1_0 = C::get_extended_op(ctx, pattern3_0); let expr2_0 = @@ -1651,7 +1633,7 @@ pub fn constructor_alu_rrrr( let pattern1_0 = arg1; let pattern2_0 = arg2; let pattern3_0 = arg3; - // Rule at src/isa/aarch64/inst.isle line 1466. + // Rule at src/isa/aarch64/inst.isle line 1465. let expr0_0: Type = I64; let expr1_0 = C::temp_writable_reg(ctx, expr0_0); let expr2_0 = MInst::AluRRRR { @@ -1676,7 +1658,7 @@ pub fn constructor_bit_rr( let pattern0_0 = arg0; let pattern1_0 = arg1; let pattern2_0 = arg2; - // Rule at src/isa/aarch64/inst.isle line 1473. + // Rule at src/isa/aarch64/inst.isle line 1472. let expr0_0: Type = I64; let expr1_0 = C::temp_writable_reg(ctx, expr0_0); let expr2_0 = constructor_operand_size(ctx, pattern1_0)?; @@ -1701,7 +1683,7 @@ pub fn constructor_add_with_flags_paired( let pattern0_0 = arg0; let pattern1_0 = arg1; let pattern2_0 = arg2; - // Rule at src/isa/aarch64/inst.isle line 1480. + // Rule at src/isa/aarch64/inst.isle line 1479. let expr0_0: Type = I64; let expr1_0 = C::temp_writable_reg(ctx, expr0_0); let expr2_0 = ALUOp::AddS; @@ -1731,7 +1713,7 @@ pub fn constructor_adc_paired( let pattern0_0 = arg0; let pattern1_0 = arg1; let pattern2_0 = arg2; - // Rule at src/isa/aarch64/inst.isle line 1488. + // Rule at src/isa/aarch64/inst.isle line 1487. let expr0_0: Type = I64; let expr1_0 = C::temp_writable_reg(ctx, expr0_0); let expr2_0 = ALUOp::Adc; @@ -1761,7 +1743,7 @@ pub fn constructor_sub_with_flags_paired( let pattern0_0 = arg0; let pattern1_0 = arg1; let pattern2_0 = arg2; - // Rule at src/isa/aarch64/inst.isle line 1496. + // Rule at src/isa/aarch64/inst.isle line 1495. let expr0_0: Type = I64; let expr1_0 = C::temp_writable_reg(ctx, expr0_0); let expr2_0 = ALUOp::SubS; @@ -1789,7 +1771,7 @@ pub fn constructor_cmp64_imm( ) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/aarch64/inst.isle line 1503. + // Rule at src/isa/aarch64/inst.isle line 1502. let expr0_0 = ALUOp::SubS; let expr1_0 = OperandSize::Size64; let expr2_0 = C::writable_zero_reg(ctx); @@ -1814,7 +1796,7 @@ pub fn constructor_sbc_paired( let pattern0_0 = arg0; let pattern1_0 = arg1; let pattern2_0 = arg2; - // Rule at src/isa/aarch64/inst.isle line 1510. + // Rule at src/isa/aarch64/inst.isle line 1509. let expr0_0: Type = I64; let expr1_0 = C::temp_writable_reg(ctx, expr0_0); let expr2_0 = ALUOp::Sbc; @@ -1844,7 +1826,7 @@ pub fn constructor_vec_misc( let pattern0_0 = arg0; let pattern1_0 = arg1; let pattern2_0 = arg2; - // Rule at src/isa/aarch64/inst.isle line 1518. + // Rule at src/isa/aarch64/inst.isle line 1517. let expr0_0: Type = I8X16; let expr1_0 = C::temp_writable_reg(ctx, expr0_0); let expr2_0 = MInst::VecMisc { @@ -1870,7 +1852,7 @@ pub fn constructor_vec_rrr_long( let pattern1_0 = arg1; let pattern2_0 = arg2; let pattern3_0 = arg3; - // Rule at src/isa/aarch64/inst.isle line 1525. + // Rule at src/isa/aarch64/inst.isle line 1524. let expr0_0: Type = I8X16; let expr1_0 = C::temp_writable_reg(ctx, expr0_0); let expr2_0 = MInst::VecRRRLong { @@ -1899,7 +1881,7 @@ pub fn constructor_vec_rrrr_long( let pattern2_0 = arg2; let pattern3_0 = arg3; let pattern4_0 = arg4; - // Rule at src/isa/aarch64/inst.isle line 1535. + // Rule at src/isa/aarch64/inst.isle line 1534. let expr0_0: Type = I8X16; let expr1_0 = C::temp_writable_reg(ctx, expr0_0); let expr2_0 = MInst::FpuMove128 { @@ -1929,7 +1911,7 @@ pub fn constructor_vec_rr_narrow( let pattern0_0 = arg0; let pattern1_0 = arg1; let pattern2_0 = arg2; - // Rule at src/isa/aarch64/inst.isle line 1543. + // Rule at src/isa/aarch64/inst.isle line 1542. let expr0_0: Type = I8X16; let expr1_0 = C::temp_writable_reg(ctx, expr0_0); let expr2_0 = MInst::VecRRNarrow { @@ -1953,7 +1935,7 @@ pub fn constructor_vec_rr_long( let pattern0_0 = arg0; let pattern1_0 = arg1; let pattern2_0 = arg2; - // Rule at src/isa/aarch64/inst.isle line 1550. + // Rule at src/isa/aarch64/inst.isle line 1549. let expr0_0: Type = I8X16; let expr1_0 = C::temp_writable_reg(ctx, expr0_0); let expr2_0 = MInst::VecRRLong { @@ -1975,7 +1957,7 @@ pub fn constructor_mov_to_fpu( ) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/aarch64/inst.isle line 1557. + // Rule at src/isa/aarch64/inst.isle line 1556. let expr0_0: Type = I8X16; let expr1_0 = C::temp_writable_reg(ctx, expr0_0); let expr2_0 = MInst::MovToFpu { @@ -2000,7 +1982,7 @@ pub fn constructor_mov_to_vec( let pattern1_0 = arg1; let pattern2_0 = arg2; let pattern3_0 = arg3; - // Rule at src/isa/aarch64/inst.isle line 1564. + // Rule at src/isa/aarch64/inst.isle line 1563. let expr0_0: Type = I8X16; let expr1_0 = C::temp_writable_reg(ctx, expr0_0); let expr2_0 = MInst::FpuMove128 { @@ -2029,7 +2011,7 @@ pub fn constructor_mov_from_vec( let pattern0_0 = arg0; let pattern1_0 = arg1; let pattern2_0 = arg2; - // Rule at src/isa/aarch64/inst.isle line 1572. + // Rule at src/isa/aarch64/inst.isle line 1571. let expr0_0: Type = I64; let expr1_0 = C::temp_writable_reg(ctx, expr0_0); let expr2_0 = MInst::MovFromVec { @@ -2055,7 +2037,7 @@ pub fn constructor_mov_from_vec_signed( let pattern1_0 = arg1; let pattern2_0 = arg2; let pattern3_0 = arg3; - // Rule at src/isa/aarch64/inst.isle line 1579. + // Rule at src/isa/aarch64/inst.isle line 1578. let expr0_0: Type = I64; let expr1_0 = C::temp_writable_reg(ctx, expr0_0); let expr2_0 = MInst::MovFromVecSigned { @@ -2082,7 +2064,7 @@ pub fn constructor_extend( let pattern1_0 = arg1; let pattern2_0 = arg2; let pattern3_0 = arg3; - // Rule at src/isa/aarch64/inst.isle line 1586. + // Rule at src/isa/aarch64/inst.isle line 1585. let expr0_0: Type = I64; let expr1_0 = C::temp_writable_reg(ctx, expr0_0); let expr2_0 = MInst::Extend { @@ -2101,7 +2083,7 @@ pub fn constructor_extend( pub fn constructor_load_acquire(ctx: &mut C, arg0: Type, arg1: Reg) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/aarch64/inst.isle line 1593. + // Rule at src/isa/aarch64/inst.isle line 1592. let expr0_0: Type = I64; let expr1_0 = C::temp_writable_reg(ctx, expr0_0); let expr2_0 = MInst::LoadAcquire { @@ -2124,7 +2106,7 @@ pub fn constructor_tst_imm( let pattern0_0 = arg0; let pattern1_0 = arg1; let pattern2_0 = arg2; - // Rule at src/isa/aarch64/inst.isle line 1603. + // Rule at src/isa/aarch64/inst.isle line 1602. let expr0_0 = ALUOp::AndS; let expr1_0 = constructor_operand_size(ctx, pattern0_0)?; let expr2_0 = C::writable_zero_reg(ctx); @@ -2149,7 +2131,7 @@ pub fn constructor_csel( let pattern0_0 = arg0; let pattern1_0 = arg1; let pattern2_0 = arg2; - // Rule at src/isa/aarch64/inst.isle line 1617. + // Rule at src/isa/aarch64/inst.isle line 1616. let expr0_0: Type = I64; let expr1_0 = C::temp_writable_reg(ctx, expr0_0); let expr2_0 = MInst::CSel { @@ -2171,7 +2153,7 @@ pub fn constructor_add(ctx: &mut C, arg0: Type, arg1: Reg, arg2: Reg let pattern0_0 = arg0; let pattern1_0 = arg1; let pattern2_0 = arg2; - // Rule at src/isa/aarch64/inst.isle line 1626. + // Rule at src/isa/aarch64/inst.isle line 1625. let expr0_0 = ALUOp::Add; let expr1_0 = constructor_alu_rrr(ctx, &expr0_0, pattern0_0, pattern1_0, pattern2_0)?; return Some(expr1_0); @@ -2187,7 +2169,7 @@ pub fn constructor_add_imm( let pattern0_0 = arg0; let pattern1_0 = arg1; let pattern2_0 = arg2; - // Rule at src/isa/aarch64/inst.isle line 1629. + // Rule at src/isa/aarch64/inst.isle line 1628. let expr0_0 = ALUOp::Add; let expr1_0 = constructor_alu_rr_imm12(ctx, &expr0_0, pattern0_0, pattern1_0, pattern2_0)?; return Some(expr1_0); @@ -2203,7 +2185,7 @@ pub fn constructor_add_extend( let pattern0_0 = arg0; let pattern1_0 = arg1; let pattern2_0 = arg2; - // Rule at src/isa/aarch64/inst.isle line 1632. + // Rule at src/isa/aarch64/inst.isle line 1631. let expr0_0 = ALUOp::Add; let expr1_0 = constructor_alu_rr_extend_reg(ctx, &expr0_0, pattern0_0, pattern1_0, pattern2_0)?; return Some(expr1_0); @@ -2221,7 +2203,7 @@ pub fn constructor_add_shift( let pattern1_0 = arg1; let pattern2_0 = arg2; let pattern3_0 = arg3; - // Rule at src/isa/aarch64/inst.isle line 1635. + // Rule at src/isa/aarch64/inst.isle line 1634. let expr0_0 = ALUOp::Add; let expr1_0 = constructor_alu_rrr_shift( ctx, &expr0_0, pattern0_0, pattern1_0, pattern2_0, pattern3_0, @@ -2239,7 +2221,7 @@ pub fn constructor_add_vec( let pattern0_0 = arg0; let pattern1_0 = arg1; let pattern2_0 = arg2; - // Rule at src/isa/aarch64/inst.isle line 1638. + // Rule at src/isa/aarch64/inst.isle line 1637. let expr0_0 = VecALUOp::Add; let expr1_0 = constructor_vec_rrr(ctx, &expr0_0, pattern0_0, pattern1_0, pattern2_0)?; return Some(expr1_0); @@ -2250,7 +2232,7 @@ pub fn constructor_sub(ctx: &mut C, arg0: Type, arg1: Reg, arg2: Reg let pattern0_0 = arg0; let pattern1_0 = arg1; let pattern2_0 = arg2; - // Rule at src/isa/aarch64/inst.isle line 1643. + // Rule at src/isa/aarch64/inst.isle line 1642. let expr0_0 = ALUOp::Sub; let expr1_0 = constructor_alu_rrr(ctx, &expr0_0, pattern0_0, pattern1_0, pattern2_0)?; return Some(expr1_0); @@ -2266,7 +2248,7 @@ pub fn constructor_sub_imm( let pattern0_0 = arg0; let pattern1_0 = arg1; let pattern2_0 = arg2; - // Rule at src/isa/aarch64/inst.isle line 1646. + // Rule at src/isa/aarch64/inst.isle line 1645. let expr0_0 = ALUOp::Sub; let expr1_0 = constructor_alu_rr_imm12(ctx, &expr0_0, pattern0_0, pattern1_0, pattern2_0)?; return Some(expr1_0); @@ -2282,7 +2264,7 @@ pub fn constructor_sub_extend( let pattern0_0 = arg0; let pattern1_0 = arg1; let pattern2_0 = arg2; - // Rule at src/isa/aarch64/inst.isle line 1649. + // Rule at src/isa/aarch64/inst.isle line 1648. let expr0_0 = ALUOp::Sub; let expr1_0 = constructor_alu_rr_extend_reg(ctx, &expr0_0, pattern0_0, pattern1_0, pattern2_0)?; return Some(expr1_0); @@ -2300,7 +2282,7 @@ pub fn constructor_sub_shift( let pattern1_0 = arg1; let pattern2_0 = arg2; let pattern3_0 = arg3; - // Rule at src/isa/aarch64/inst.isle line 1652. + // Rule at src/isa/aarch64/inst.isle line 1651. let expr0_0 = ALUOp::Sub; let expr1_0 = constructor_alu_rrr_shift( ctx, &expr0_0, pattern0_0, pattern1_0, pattern2_0, pattern3_0, @@ -2318,7 +2300,7 @@ pub fn constructor_sub_vec( let pattern0_0 = arg0; let pattern1_0 = arg1; let pattern2_0 = arg2; - // Rule at src/isa/aarch64/inst.isle line 1655. + // Rule at src/isa/aarch64/inst.isle line 1654. let expr0_0 = VecALUOp::Sub; let expr1_0 = constructor_vec_rrr(ctx, &expr0_0, pattern0_0, pattern1_0, pattern2_0)?; return Some(expr1_0); @@ -2337,7 +2319,7 @@ pub fn constructor_madd( let pattern2_0 = arg1; let pattern3_0 = arg2; let pattern4_0 = arg3; - // Rule at src/isa/aarch64/inst.isle line 1661. + // Rule at src/isa/aarch64/inst.isle line 1660. let expr0_0 = constructor_madd64(ctx, pattern2_0, pattern3_0, pattern4_0)?; return Some(expr0_0); } @@ -2345,7 +2327,7 @@ pub fn constructor_madd( let pattern2_0 = arg1; let pattern3_0 = arg2; let pattern4_0 = arg3; - // Rule at src/isa/aarch64/inst.isle line 1660. + // Rule at src/isa/aarch64/inst.isle line 1659. let expr0_0 = constructor_madd32(ctx, pattern2_0, pattern3_0, pattern4_0)?; return Some(expr0_0); } @@ -2357,7 +2339,7 @@ pub fn constructor_madd32(ctx: &mut C, arg0: Reg, arg1: Reg, arg2: R let pattern0_0 = arg0; let pattern1_0 = arg1; let pattern2_0 = arg2; - // Rule at src/isa/aarch64/inst.isle line 1664. + // Rule at src/isa/aarch64/inst.isle line 1663. let expr0_0 = ALUOp3::MAdd32; let expr1_0 = constructor_alu_rrrr(ctx, &expr0_0, pattern0_0, pattern1_0, pattern2_0)?; return Some(expr1_0); @@ -2368,7 +2350,7 @@ pub fn constructor_madd64(ctx: &mut C, arg0: Reg, arg1: Reg, arg2: R let pattern0_0 = arg0; let pattern1_0 = arg1; let pattern2_0 = arg2; - // Rule at src/isa/aarch64/inst.isle line 1667. + // Rule at src/isa/aarch64/inst.isle line 1666. let expr0_0 = ALUOp3::MAdd64; let expr1_0 = constructor_alu_rrrr(ctx, &expr0_0, pattern0_0, pattern1_0, pattern2_0)?; return Some(expr1_0); @@ -2379,7 +2361,7 @@ pub fn constructor_msub64(ctx: &mut C, arg0: Reg, arg1: Reg, arg2: R let pattern0_0 = arg0; let pattern1_0 = arg1; let pattern2_0 = arg2; - // Rule at src/isa/aarch64/inst.isle line 1672. + // Rule at src/isa/aarch64/inst.isle line 1671. let expr0_0 = ALUOp3::MSub64; let expr1_0 = constructor_alu_rrrr(ctx, &expr0_0, pattern0_0, pattern1_0, pattern2_0)?; return Some(expr1_0); @@ -2395,7 +2377,7 @@ pub fn constructor_uqadd( let pattern0_0 = arg0; let pattern1_0 = arg1; let pattern2_0 = arg2; - // Rule at src/isa/aarch64/inst.isle line 1676. + // Rule at src/isa/aarch64/inst.isle line 1675. let expr0_0 = VecALUOp::Uqadd; let expr1_0 = constructor_vec_rrr(ctx, &expr0_0, pattern0_0, pattern1_0, pattern2_0)?; return Some(expr1_0); @@ -2411,7 +2393,7 @@ pub fn constructor_sqadd( let pattern0_0 = arg0; let pattern1_0 = arg1; let pattern2_0 = arg2; - // Rule at src/isa/aarch64/inst.isle line 1680. + // Rule at src/isa/aarch64/inst.isle line 1679. let expr0_0 = VecALUOp::Sqadd; let expr1_0 = constructor_vec_rrr(ctx, &expr0_0, pattern0_0, pattern1_0, pattern2_0)?; return Some(expr1_0); @@ -2427,7 +2409,7 @@ pub fn constructor_uqsub( let pattern0_0 = arg0; let pattern1_0 = arg1; let pattern2_0 = arg2; - // Rule at src/isa/aarch64/inst.isle line 1684. + // Rule at src/isa/aarch64/inst.isle line 1683. let expr0_0 = VecALUOp::Uqsub; let expr1_0 = constructor_vec_rrr(ctx, &expr0_0, pattern0_0, pattern1_0, pattern2_0)?; return Some(expr1_0); @@ -2443,7 +2425,7 @@ pub fn constructor_sqsub( let pattern0_0 = arg0; let pattern1_0 = arg1; let pattern2_0 = arg2; - // Rule at src/isa/aarch64/inst.isle line 1688. + // Rule at src/isa/aarch64/inst.isle line 1687. let expr0_0 = VecALUOp::Sqsub; let expr1_0 = constructor_vec_rrr(ctx, &expr0_0, pattern0_0, pattern1_0, pattern2_0)?; return Some(expr1_0); @@ -2454,7 +2436,7 @@ pub fn constructor_umulh(ctx: &mut C, arg0: Type, arg1: Reg, arg2: R let pattern0_0 = arg0; let pattern1_0 = arg1; let pattern2_0 = arg2; - // Rule at src/isa/aarch64/inst.isle line 1692. + // Rule at src/isa/aarch64/inst.isle line 1691. let expr0_0 = ALUOp::UMulH; let expr1_0 = constructor_alu_rrr(ctx, &expr0_0, pattern0_0, pattern1_0, pattern2_0)?; return Some(expr1_0); @@ -2465,7 +2447,7 @@ pub fn constructor_smulh(ctx: &mut C, arg0: Type, arg1: Reg, arg2: R let pattern0_0 = arg0; let pattern1_0 = arg1; let pattern2_0 = arg2; - // Rule at src/isa/aarch64/inst.isle line 1696. + // Rule at src/isa/aarch64/inst.isle line 1695. let expr0_0 = ALUOp::SMulH; let expr1_0 = constructor_alu_rrr(ctx, &expr0_0, pattern0_0, pattern1_0, pattern2_0)?; return Some(expr1_0); @@ -2481,7 +2463,7 @@ pub fn constructor_mul( let pattern0_0 = arg0; let pattern1_0 = arg1; let pattern2_0 = arg2; - // Rule at src/isa/aarch64/inst.isle line 1700. + // Rule at src/isa/aarch64/inst.isle line 1699. let expr0_0 = VecALUOp::Mul; let expr1_0 = constructor_vec_rrr(ctx, &expr0_0, pattern0_0, pattern1_0, pattern2_0)?; return Some(expr1_0); @@ -2491,7 +2473,7 @@ pub fn constructor_mul( pub fn constructor_neg(ctx: &mut C, arg0: Reg, arg1: &VectorSize) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/aarch64/inst.isle line 1704. + // Rule at src/isa/aarch64/inst.isle line 1703. let expr0_0 = VecMisc2::Neg; let expr1_0 = constructor_vec_misc(ctx, &expr0_0, pattern0_0, pattern1_0)?; return Some(expr1_0); @@ -2501,7 +2483,7 @@ pub fn constructor_neg(ctx: &mut C, arg0: Reg, arg1: &VectorSize) -> pub fn constructor_rev64(ctx: &mut C, arg0: Reg, arg1: &VectorSize) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/aarch64/inst.isle line 1708. + // Rule at src/isa/aarch64/inst.isle line 1707. let expr0_0 = VecMisc2::Rev64; let expr1_0 = constructor_vec_misc(ctx, &expr0_0, pattern0_0, pattern1_0)?; return Some(expr1_0); @@ -2511,7 +2493,7 @@ pub fn constructor_rev64(ctx: &mut C, arg0: Reg, arg1: &VectorSize) pub fn constructor_xtn64(ctx: &mut C, arg0: Reg, arg1: bool) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/aarch64/inst.isle line 1712. + // Rule at src/isa/aarch64/inst.isle line 1711. let expr0_0 = VecRRNarrowOp::Xtn64; let expr1_0 = constructor_vec_rr_narrow(ctx, &expr0_0, pattern0_0, pattern1_0)?; return Some(expr1_0); @@ -2527,7 +2509,7 @@ pub fn constructor_addp( let pattern0_0 = arg0; let pattern1_0 = arg1; let pattern2_0 = arg2; - // Rule at src/isa/aarch64/inst.isle line 1716. + // Rule at src/isa/aarch64/inst.isle line 1715. let expr0_0 = VecALUOp::Addp; let expr1_0 = constructor_vec_rrr(ctx, &expr0_0, pattern0_0, pattern1_0, pattern2_0)?; return Some(expr1_0); @@ -2537,7 +2519,7 @@ pub fn constructor_addp( pub fn constructor_addv(ctx: &mut C, arg0: Reg, arg1: &VectorSize) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/aarch64/inst.isle line 1720. + // Rule at src/isa/aarch64/inst.isle line 1719. let expr0_0 = VecLanesOp::Addv; let expr1_0 = constructor_vec_lanes(ctx, &expr0_0, pattern0_0, pattern1_0)?; return Some(expr1_0); @@ -2547,7 +2529,7 @@ pub fn constructor_addv(ctx: &mut C, arg0: Reg, arg1: &VectorSize) - pub fn constructor_shll32(ctx: &mut C, arg0: Reg, arg1: bool) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/aarch64/inst.isle line 1724. + // Rule at src/isa/aarch64/inst.isle line 1723. let expr0_0 = VecRRLongOp::Shll32; let expr1_0 = constructor_vec_rr_long(ctx, &expr0_0, pattern0_0, pattern1_0)?; return Some(expr1_0); @@ -2565,7 +2547,7 @@ pub fn constructor_umlal32( let pattern1_0 = arg1; let pattern2_0 = arg2; let pattern3_0 = arg3; - // Rule at src/isa/aarch64/inst.isle line 1728. + // Rule at src/isa/aarch64/inst.isle line 1727. let expr0_0 = VecRRRLongOp::Umlal32; let expr1_0 = constructor_vec_rrrr_long( ctx, &expr0_0, pattern0_0, pattern1_0, pattern2_0, pattern3_0, @@ -2583,7 +2565,7 @@ pub fn constructor_smull8( let pattern0_0 = arg0; let pattern1_0 = arg1; let pattern2_0 = arg2; - // Rule at src/isa/aarch64/inst.isle line 1732. + // Rule at src/isa/aarch64/inst.isle line 1731. let expr0_0 = VecRRRLongOp::Smull8; let expr1_0 = constructor_vec_rrr_long(ctx, &expr0_0, pattern0_0, pattern1_0, pattern2_0)?; return Some(expr1_0); @@ -2599,7 +2581,7 @@ pub fn constructor_umull8( let pattern0_0 = arg0; let pattern1_0 = arg1; let pattern2_0 = arg2; - // Rule at src/isa/aarch64/inst.isle line 1736. + // Rule at src/isa/aarch64/inst.isle line 1735. let expr0_0 = VecRRRLongOp::Umull8; let expr1_0 = constructor_vec_rrr_long(ctx, &expr0_0, pattern0_0, pattern1_0, pattern2_0)?; return Some(expr1_0); @@ -2615,7 +2597,7 @@ pub fn constructor_smull16( let pattern0_0 = arg0; let pattern1_0 = arg1; let pattern2_0 = arg2; - // Rule at src/isa/aarch64/inst.isle line 1740. + // Rule at src/isa/aarch64/inst.isle line 1739. let expr0_0 = VecRRRLongOp::Smull16; let expr1_0 = constructor_vec_rrr_long(ctx, &expr0_0, pattern0_0, pattern1_0, pattern2_0)?; return Some(expr1_0); @@ -2631,7 +2613,7 @@ pub fn constructor_umull16( let pattern0_0 = arg0; let pattern1_0 = arg1; let pattern2_0 = arg2; - // Rule at src/isa/aarch64/inst.isle line 1744. + // Rule at src/isa/aarch64/inst.isle line 1743. let expr0_0 = VecRRRLongOp::Umull16; let expr1_0 = constructor_vec_rrr_long(ctx, &expr0_0, pattern0_0, pattern1_0, pattern2_0)?; return Some(expr1_0); @@ -2647,7 +2629,7 @@ pub fn constructor_smull32( let pattern0_0 = arg0; let pattern1_0 = arg1; let pattern2_0 = arg2; - // Rule at src/isa/aarch64/inst.isle line 1748. + // Rule at src/isa/aarch64/inst.isle line 1747. let expr0_0 = VecRRRLongOp::Smull32; let expr1_0 = constructor_vec_rrr_long(ctx, &expr0_0, pattern0_0, pattern1_0, pattern2_0)?; return Some(expr1_0); @@ -2663,7 +2645,7 @@ pub fn constructor_umull32( let pattern0_0 = arg0; let pattern1_0 = arg1; let pattern2_0 = arg2; - // Rule at src/isa/aarch64/inst.isle line 1752. + // Rule at src/isa/aarch64/inst.isle line 1751. let expr0_0 = VecRRRLongOp::Umull32; let expr1_0 = constructor_vec_rrr_long(ctx, &expr0_0, pattern0_0, pattern1_0, pattern2_0)?; return Some(expr1_0); @@ -2674,7 +2656,7 @@ pub fn constructor_asr(ctx: &mut C, arg0: Type, arg1: Reg, arg2: Reg let pattern0_0 = arg0; let pattern1_0 = arg1; let pattern2_0 = arg2; - // Rule at src/isa/aarch64/inst.isle line 1756. + // Rule at src/isa/aarch64/inst.isle line 1755. let expr0_0 = ALUOp::Asr; let expr1_0 = constructor_alu_rrr(ctx, &expr0_0, pattern0_0, pattern1_0, pattern2_0)?; return Some(expr1_0); @@ -2690,7 +2672,7 @@ pub fn constructor_asr_imm( let pattern0_0 = arg0; let pattern1_0 = arg1; let pattern2_0 = arg2; - // Rule at src/isa/aarch64/inst.isle line 1759. + // Rule at src/isa/aarch64/inst.isle line 1758. let expr0_0 = ALUOp::Asr; let expr1_0 = constructor_alu_rr_imm_shift(ctx, &expr0_0, pattern0_0, pattern1_0, pattern2_0)?; return Some(expr1_0); @@ -2701,7 +2683,7 @@ pub fn constructor_lsr(ctx: &mut C, arg0: Type, arg1: Reg, arg2: Reg let pattern0_0 = arg0; let pattern1_0 = arg1; let pattern2_0 = arg2; - // Rule at src/isa/aarch64/inst.isle line 1763. + // Rule at src/isa/aarch64/inst.isle line 1762. let expr0_0 = ALUOp::Lsr; let expr1_0 = constructor_alu_rrr(ctx, &expr0_0, pattern0_0, pattern1_0, pattern2_0)?; return Some(expr1_0); @@ -2717,7 +2699,7 @@ pub fn constructor_lsr_imm( let pattern0_0 = arg0; let pattern1_0 = arg1; let pattern2_0 = arg2; - // Rule at src/isa/aarch64/inst.isle line 1766. + // Rule at src/isa/aarch64/inst.isle line 1765. let expr0_0 = ALUOp::Lsr; let expr1_0 = constructor_alu_rr_imm_shift(ctx, &expr0_0, pattern0_0, pattern1_0, pattern2_0)?; return Some(expr1_0); @@ -2728,7 +2710,7 @@ pub fn constructor_lsl(ctx: &mut C, arg0: Type, arg1: Reg, arg2: Reg let pattern0_0 = arg0; let pattern1_0 = arg1; let pattern2_0 = arg2; - // Rule at src/isa/aarch64/inst.isle line 1770. + // Rule at src/isa/aarch64/inst.isle line 1769. let expr0_0 = ALUOp::Lsl; let expr1_0 = constructor_alu_rrr(ctx, &expr0_0, pattern0_0, pattern1_0, pattern2_0)?; return Some(expr1_0); @@ -2744,7 +2726,7 @@ pub fn constructor_lsl_imm( let pattern0_0 = arg0; let pattern1_0 = arg1; let pattern2_0 = arg2; - // Rule at src/isa/aarch64/inst.isle line 1773. + // Rule at src/isa/aarch64/inst.isle line 1772. let expr0_0 = ALUOp::Lsl; let expr1_0 = constructor_alu_rr_imm_shift(ctx, &expr0_0, pattern0_0, pattern1_0, pattern2_0)?; return Some(expr1_0); @@ -2760,7 +2742,7 @@ pub fn constructor_a64_udiv( let pattern0_0 = arg0; let pattern1_0 = arg1; let pattern2_0 = arg2; - // Rule at src/isa/aarch64/inst.isle line 1777. + // Rule at src/isa/aarch64/inst.isle line 1776. let expr0_0 = ALUOp::UDiv; let expr1_0 = constructor_alu_rrr(ctx, &expr0_0, pattern0_0, pattern1_0, pattern2_0)?; return Some(expr1_0); @@ -2776,7 +2758,7 @@ pub fn constructor_a64_sdiv( let pattern0_0 = arg0; let pattern1_0 = arg1; let pattern2_0 = arg2; - // Rule at src/isa/aarch64/inst.isle line 1781. + // Rule at src/isa/aarch64/inst.isle line 1780. let expr0_0 = ALUOp::SDiv; let expr1_0 = constructor_alu_rrr(ctx, &expr0_0, pattern0_0, pattern1_0, pattern2_0)?; return Some(expr1_0); @@ -2786,7 +2768,7 @@ pub fn constructor_a64_sdiv( pub fn constructor_not(ctx: &mut C, arg0: Reg, arg1: &VectorSize) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/aarch64/inst.isle line 1785. + // Rule at src/isa/aarch64/inst.isle line 1784. let expr0_0 = VecMisc2::Not; let expr1_0 = constructor_vec_misc(ctx, &expr0_0, pattern0_0, pattern1_0)?; return Some(expr1_0); @@ -2802,7 +2784,7 @@ pub fn constructor_orr_not( let pattern0_0 = arg0; let pattern1_0 = arg1; let pattern2_0 = arg2; - // Rule at src/isa/aarch64/inst.isle line 1790. + // Rule at src/isa/aarch64/inst.isle line 1789. let expr0_0 = ALUOp::OrrNot; let expr1_0 = constructor_alu_rrr(ctx, &expr0_0, pattern0_0, pattern1_0, pattern2_0)?; return Some(expr1_0); @@ -2820,7 +2802,7 @@ pub fn constructor_orr_not_shift( let pattern1_0 = arg1; let pattern2_0 = arg2; let pattern3_0 = arg3; - // Rule at src/isa/aarch64/inst.isle line 1793. + // Rule at src/isa/aarch64/inst.isle line 1792. let expr0_0 = ALUOp::OrrNot; let expr1_0 = constructor_alu_rrr_shift( ctx, &expr0_0, pattern0_0, pattern1_0, pattern2_0, pattern3_0, @@ -2833,7 +2815,7 @@ pub fn constructor_orr(ctx: &mut C, arg0: Type, arg1: Reg, arg2: Reg let pattern0_0 = arg0; let pattern1_0 = arg1; let pattern2_0 = arg2; - // Rule at src/isa/aarch64/inst.isle line 1798. + // Rule at src/isa/aarch64/inst.isle line 1797. let expr0_0 = ALUOp::Orr; let expr1_0 = constructor_alu_rrr(ctx, &expr0_0, pattern0_0, pattern1_0, pattern2_0)?; return Some(expr1_0); @@ -2849,7 +2831,7 @@ pub fn constructor_orr_imm( let pattern0_0 = arg0; let pattern1_0 = arg1; let pattern2_0 = arg2; - // Rule at src/isa/aarch64/inst.isle line 1801. + // Rule at src/isa/aarch64/inst.isle line 1800. let expr0_0 = ALUOp::Orr; let expr1_0 = constructor_alu_rr_imm_logic(ctx, &expr0_0, pattern0_0, pattern1_0, pattern2_0)?; return Some(expr1_0); @@ -2865,7 +2847,7 @@ pub fn constructor_orr_vec( let pattern0_0 = arg0; let pattern1_0 = arg1; let pattern2_0 = arg2; - // Rule at src/isa/aarch64/inst.isle line 1804. + // Rule at src/isa/aarch64/inst.isle line 1803. let expr0_0 = VecALUOp::Orr; let expr1_0 = constructor_vec_rrr(ctx, &expr0_0, pattern0_0, pattern1_0, pattern2_0)?; return Some(expr1_0); @@ -2881,7 +2863,7 @@ pub fn constructor_and_imm( let pattern0_0 = arg0; let pattern1_0 = arg1; let pattern2_0 = arg2; - // Rule at src/isa/aarch64/inst.isle line 1809. + // Rule at src/isa/aarch64/inst.isle line 1808. let expr0_0 = ALUOp::And; let expr1_0 = constructor_alu_rr_imm_logic(ctx, &expr0_0, pattern0_0, pattern1_0, pattern2_0)?; return Some(expr1_0); @@ -2897,7 +2879,7 @@ pub fn constructor_and_vec( let pattern0_0 = arg0; let pattern1_0 = arg1; let pattern2_0 = arg2; - // Rule at src/isa/aarch64/inst.isle line 1812. + // Rule at src/isa/aarch64/inst.isle line 1811. let expr0_0 = VecALUOp::And; let expr1_0 = constructor_vec_rrr(ctx, &expr0_0, pattern0_0, pattern1_0, pattern2_0)?; return Some(expr1_0); @@ -2913,7 +2895,7 @@ pub fn constructor_eor_vec( let pattern0_0 = arg0; let pattern1_0 = arg1; let pattern2_0 = arg2; - // Rule at src/isa/aarch64/inst.isle line 1816. + // Rule at src/isa/aarch64/inst.isle line 1815. let expr0_0 = VecALUOp::Eor; let expr1_0 = constructor_vec_rrr(ctx, &expr0_0, pattern0_0, pattern1_0, pattern2_0)?; return Some(expr1_0); @@ -2929,7 +2911,7 @@ pub fn constructor_bic_vec( let pattern0_0 = arg0; let pattern1_0 = arg1; let pattern2_0 = arg2; - // Rule at src/isa/aarch64/inst.isle line 1820. + // Rule at src/isa/aarch64/inst.isle line 1819. let expr0_0 = VecALUOp::Bic; let expr1_0 = constructor_vec_rrr(ctx, &expr0_0, pattern0_0, pattern1_0, pattern2_0)?; return Some(expr1_0); @@ -2945,7 +2927,7 @@ pub fn constructor_sshl( let pattern0_0 = arg0; let pattern1_0 = arg1; let pattern2_0 = arg2; - // Rule at src/isa/aarch64/inst.isle line 1824. + // Rule at src/isa/aarch64/inst.isle line 1823. let expr0_0 = VecALUOp::Sshl; let expr1_0 = constructor_vec_rrr(ctx, &expr0_0, pattern0_0, pattern1_0, pattern2_0)?; return Some(expr1_0); @@ -2961,7 +2943,7 @@ pub fn constructor_ushl( let pattern0_0 = arg0; let pattern1_0 = arg1; let pattern2_0 = arg2; - // Rule at src/isa/aarch64/inst.isle line 1828. + // Rule at src/isa/aarch64/inst.isle line 1827. let expr0_0 = VecALUOp::Ushl; let expr1_0 = constructor_vec_rrr(ctx, &expr0_0, pattern0_0, pattern1_0, pattern2_0)?; return Some(expr1_0); @@ -2977,7 +2959,7 @@ pub fn constructor_a64_rotr( let pattern0_0 = arg0; let pattern1_0 = arg1; let pattern2_0 = arg2; - // Rule at src/isa/aarch64/inst.isle line 1833. + // Rule at src/isa/aarch64/inst.isle line 1832. let expr0_0 = ALUOp::RotR; let expr1_0 = constructor_alu_rrr(ctx, &expr0_0, pattern0_0, pattern1_0, pattern2_0)?; return Some(expr1_0); @@ -2993,7 +2975,7 @@ pub fn constructor_a64_rotr_imm( let pattern0_0 = arg0; let pattern1_0 = arg1; let pattern2_0 = arg2; - // Rule at src/isa/aarch64/inst.isle line 1836. + // Rule at src/isa/aarch64/inst.isle line 1835. let expr0_0 = ALUOp::RotR; let expr1_0 = constructor_alu_rr_imm_shift(ctx, &expr0_0, pattern0_0, pattern1_0, pattern2_0)?; return Some(expr1_0); @@ -3003,7 +2985,7 @@ pub fn constructor_a64_rotr_imm( pub fn constructor_rbit(ctx: &mut C, arg0: Type, arg1: Reg) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/aarch64/inst.isle line 1841. + // Rule at src/isa/aarch64/inst.isle line 1840. let expr0_0 = BitOp::RBit; let expr1_0 = constructor_bit_rr(ctx, &expr0_0, pattern0_0, pattern1_0)?; return Some(expr1_0); @@ -3013,7 +2995,7 @@ pub fn constructor_rbit(ctx: &mut C, arg0: Type, arg1: Reg) -> Optio pub fn constructor_a64_clz(ctx: &mut C, arg0: Type, arg1: Reg) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/aarch64/inst.isle line 1846. + // Rule at src/isa/aarch64/inst.isle line 1845. let expr0_0 = BitOp::Clz; let expr1_0 = constructor_bit_rr(ctx, &expr0_0, pattern0_0, pattern1_0)?; return Some(expr1_0); @@ -3023,7 +3005,7 @@ pub fn constructor_a64_clz(ctx: &mut C, arg0: Type, arg1: Reg) -> Op pub fn constructor_a64_cls(ctx: &mut C, arg0: Type, arg1: Reg) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/aarch64/inst.isle line 1851. + // Rule at src/isa/aarch64/inst.isle line 1850. let expr0_0 = BitOp::Cls; let expr1_0 = constructor_bit_rr(ctx, &expr0_0, pattern0_0, pattern1_0)?; return Some(expr1_0); @@ -3034,7 +3016,7 @@ pub fn constructor_eon(ctx: &mut C, arg0: Type, arg1: Reg, arg2: Reg let pattern0_0 = arg0; let pattern1_0 = arg1; let pattern2_0 = arg2; - // Rule at src/isa/aarch64/inst.isle line 1856. + // Rule at src/isa/aarch64/inst.isle line 1855. let expr0_0 = ALUOp::EorNot; let expr1_0 = constructor_alu_rrr(ctx, &expr0_0, pattern0_0, pattern1_0, pattern2_0)?; return Some(expr1_0); @@ -3044,7 +3026,7 @@ pub fn constructor_eon(ctx: &mut C, arg0: Type, arg1: Reg, arg2: Reg pub fn constructor_vec_cnt(ctx: &mut C, arg0: Reg, arg1: &VectorSize) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/aarch64/inst.isle line 1861. + // Rule at src/isa/aarch64/inst.isle line 1860. let expr0_0 = VecMisc2::Cnt; let expr1_0 = constructor_vec_misc(ctx, &expr0_0, pattern0_0, pattern1_0)?; return Some(expr1_0); @@ -3061,7 +3043,7 @@ pub fn constructor_imm(ctx: &mut C, arg0: Type, arg1: u64) -> Option }; if let Some(pattern3_0) = closure3() { if let Some(pattern4_0) = C::imm_logic_from_u64(ctx, pattern2_0, pattern3_0) { - // Rule at src/isa/aarch64/inst.isle line 1876. + // Rule at src/isa/aarch64/inst.isle line 1875. let expr0_0: Type = I64; let expr1_0 = C::zero_reg(ctx); let expr2_0 = constructor_orr_imm(ctx, expr0_0, expr1_0, pattern4_0)?; @@ -3069,18 +3051,18 @@ pub fn constructor_imm(ctx: &mut C, arg0: Type, arg1: u64) -> Option } } if let Some(pattern3_0) = C::move_wide_const_from_u64(ctx, pattern2_0) { - // Rule at src/isa/aarch64/inst.isle line 1868. + // Rule at src/isa/aarch64/inst.isle line 1867. let expr0_0 = OperandSize::Size64; let expr1_0 = constructor_movz(ctx, pattern3_0, &expr0_0)?; return Some(expr1_0); } if let Some(pattern3_0) = C::move_wide_const_from_negated_u64(ctx, pattern2_0) { - // Rule at src/isa/aarch64/inst.isle line 1872. + // Rule at src/isa/aarch64/inst.isle line 1871. let expr0_0 = OperandSize::Size64; let expr1_0 = constructor_movn(ctx, pattern3_0, &expr0_0)?; return Some(expr1_0); } - // Rule at src/isa/aarch64/inst.isle line 1883. + // Rule at src/isa/aarch64/inst.isle line 1882. let expr0_0 = C::load_constant64_full(ctx, pattern2_0); return Some(expr0_0); } @@ -3092,17 +3074,17 @@ pub fn constructor_put_in_reg_sext32(ctx: &mut C, arg0: Value) -> Op let pattern0_0 = arg0; let pattern1_0 = C::value_type(ctx, pattern0_0); if pattern1_0 == I32 { - // Rule at src/isa/aarch64/inst.isle line 1894. + // Rule at src/isa/aarch64/inst.isle line 1893. let expr0_0 = C::put_in_reg(ctx, pattern0_0); return Some(expr0_0); } if pattern1_0 == I64 { - // Rule at src/isa/aarch64/inst.isle line 1895. + // Rule at src/isa/aarch64/inst.isle line 1894. let expr0_0 = C::put_in_reg(ctx, pattern0_0); return Some(expr0_0); } if let Some(pattern2_0) = C::fits_in_32(ctx, pattern1_0) { - // Rule at src/isa/aarch64/inst.isle line 1890. + // Rule at src/isa/aarch64/inst.isle line 1889. let expr0_0 = C::put_in_reg(ctx, pattern0_0); let expr1_0: bool = true; let expr2_0 = C::ty_bits(ctx, pattern2_0); @@ -3118,17 +3100,17 @@ pub fn constructor_put_in_reg_zext32(ctx: &mut C, arg0: Value) -> Op let pattern0_0 = arg0; let pattern1_0 = C::value_type(ctx, pattern0_0); if pattern1_0 == I32 { - // Rule at src/isa/aarch64/inst.isle line 1903. + // Rule at src/isa/aarch64/inst.isle line 1902. let expr0_0 = C::put_in_reg(ctx, pattern0_0); return Some(expr0_0); } if pattern1_0 == I64 { - // Rule at src/isa/aarch64/inst.isle line 1904. + // Rule at src/isa/aarch64/inst.isle line 1903. let expr0_0 = C::put_in_reg(ctx, pattern0_0); return Some(expr0_0); } if let Some(pattern2_0) = C::fits_in_32(ctx, pattern1_0) { - // Rule at src/isa/aarch64/inst.isle line 1899. + // Rule at src/isa/aarch64/inst.isle line 1898. let expr0_0 = C::put_in_reg(ctx, pattern0_0); let expr1_0: bool = false; let expr2_0 = C::ty_bits(ctx, pattern2_0); @@ -3144,12 +3126,12 @@ pub fn constructor_put_in_reg_sext64(ctx: &mut C, arg0: Value) -> Op let pattern0_0 = arg0; let pattern1_0 = C::value_type(ctx, pattern0_0); if pattern1_0 == I64 { - // Rule at src/isa/aarch64/inst.isle line 1912. + // Rule at src/isa/aarch64/inst.isle line 1911. let expr0_0 = C::put_in_reg(ctx, pattern0_0); return Some(expr0_0); } if let Some(pattern2_0) = C::fits_in_32(ctx, pattern1_0) { - // Rule at src/isa/aarch64/inst.isle line 1908. + // Rule at src/isa/aarch64/inst.isle line 1907. let expr0_0 = C::put_in_reg(ctx, pattern0_0); let expr1_0: bool = true; let expr2_0 = C::ty_bits(ctx, pattern2_0); @@ -3165,12 +3147,12 @@ pub fn constructor_put_in_reg_zext64(ctx: &mut C, arg0: Value) -> Op let pattern0_0 = arg0; let pattern1_0 = C::value_type(ctx, pattern0_0); if pattern1_0 == I64 { - // Rule at src/isa/aarch64/inst.isle line 1920. + // Rule at src/isa/aarch64/inst.isle line 1919. let expr0_0 = C::put_in_reg(ctx, pattern0_0); return Some(expr0_0); } if let Some(pattern2_0) = C::fits_in_32(ctx, pattern1_0) { - // Rule at src/isa/aarch64/inst.isle line 1916. + // Rule at src/isa/aarch64/inst.isle line 1915. let expr0_0 = C::put_in_reg(ctx, pattern0_0); let expr1_0: bool = false; let expr2_0 = C::ty_bits(ctx, pattern2_0); @@ -3184,7 +3166,7 @@ pub fn constructor_put_in_reg_zext64(ctx: &mut C, arg0: Value) -> Op // Generated as internal constructor for term trap_if_zero_divisor. pub fn constructor_trap_if_zero_divisor(ctx: &mut C, arg0: Reg) -> Option { let pattern0_0 = arg0; - // Rule at src/isa/aarch64/inst.isle line 1925. + // Rule at src/isa/aarch64/inst.isle line 1924. let expr0_0 = C::cond_br_zero(ctx, pattern0_0); let expr1_0 = C::trap_code_division_by_zero(ctx); let expr2_0 = MInst::TrapIf { @@ -3199,12 +3181,12 @@ pub fn constructor_trap_if_zero_divisor(ctx: &mut C, arg0: Reg) -> O pub fn constructor_size_from_ty(ctx: &mut C, arg0: Type) -> Option { let pattern0_0 = arg0; if pattern0_0 == I64 { - // Rule at src/isa/aarch64/inst.isle line 1931. + // Rule at src/isa/aarch64/inst.isle line 1930. let expr0_0 = OperandSize::Size64; return Some(expr0_0); } if let Some(pattern1_0) = C::fits_in_32(ctx, pattern0_0) { - // Rule at src/isa/aarch64/inst.isle line 1930. + // Rule at src/isa/aarch64/inst.isle line 1929. let expr0_0 = OperandSize::Size32; return Some(expr0_0); } @@ -3221,7 +3203,7 @@ pub fn constructor_trap_if_div_overflow( let pattern0_0 = arg0; let pattern1_0 = arg1; let pattern2_0 = arg2; - // Rule at src/isa/aarch64/inst.isle line 1937. + // Rule at src/isa/aarch64/inst.isle line 1936. let expr0_0 = ALUOp::AddS; let expr1_0 = constructor_operand_size(ctx, pattern0_0)?; let expr2_0 = C::writable_zero_reg(ctx); @@ -3290,7 +3272,7 @@ pub fn constructor_alu_rs_imm_logic_commutative( C::imm_logic_from_imm64(ctx, pattern5_1, pattern7_0) { let pattern9_0 = arg3; - // Rule at src/isa/aarch64/inst.isle line 1982. + // Rule at src/isa/aarch64/inst.isle line 1981. let expr0_0 = C::put_in_reg(ctx, pattern9_0); let expr1_0 = constructor_alu_rr_imm_logic( ctx, pattern0_0, pattern1_0, expr0_0, pattern8_0, @@ -3322,7 +3304,7 @@ pub fn constructor_alu_rs_imm_logic_commutative( C::lshl_from_imm64(ctx, pattern10_1, pattern12_0) { let pattern14_0 = arg3; - // Rule at src/isa/aarch64/inst.isle line 1988. + // Rule at src/isa/aarch64/inst.isle line 1987. let expr0_0 = C::put_in_reg(ctx, pattern14_0); let expr1_0 = C::put_in_reg(ctx, pattern7_0); let expr2_0 = constructor_alu_rrr_shift( @@ -3360,7 +3342,7 @@ pub fn constructor_alu_rs_imm_logic_commutative( if let Some(pattern9_0) = C::imm_logic_from_imm64(ctx, pattern6_1, pattern8_0) { - // Rule at src/isa/aarch64/inst.isle line 1980. + // Rule at src/isa/aarch64/inst.isle line 1979. let expr0_0 = C::put_in_reg(ctx, pattern2_0); let expr1_0 = constructor_alu_rr_imm_logic( ctx, pattern0_0, pattern1_0, expr0_0, pattern9_0, @@ -3391,7 +3373,7 @@ pub fn constructor_alu_rs_imm_logic_commutative( if let Some(pattern14_0) = C::lshl_from_imm64(ctx, pattern11_1, pattern13_0) { - // Rule at src/isa/aarch64/inst.isle line 1986. + // Rule at src/isa/aarch64/inst.isle line 1985. let expr0_0 = C::put_in_reg(ctx, pattern2_0); let expr1_0 = C::put_in_reg(ctx, pattern8_0); let expr2_0 = constructor_alu_rrr_shift( @@ -3413,7 +3395,7 @@ pub fn constructor_alu_rs_imm_logic_commutative( _ => {} } } - // Rule at src/isa/aarch64/inst.isle line 1976. + // Rule at src/isa/aarch64/inst.isle line 1975. let expr0_0 = C::put_in_reg(ctx, pattern2_0); let expr1_0 = C::put_in_reg(ctx, pattern3_0); let expr2_0 = constructor_alu_rrr(ctx, pattern0_0, pattern1_0, expr0_0, expr1_0)?; @@ -3447,7 +3429,7 @@ pub fn constructor_alu_rs_imm_logic( if let Some(pattern9_0) = C::imm_logic_from_imm64(ctx, pattern6_1, pattern8_0) { - // Rule at src/isa/aarch64/inst.isle line 1996. + // Rule at src/isa/aarch64/inst.isle line 1995. let expr0_0 = C::put_in_reg(ctx, pattern2_0); let expr1_0 = constructor_alu_rr_imm_logic( ctx, pattern0_0, pattern1_0, expr0_0, pattern9_0, @@ -3478,7 +3460,7 @@ pub fn constructor_alu_rs_imm_logic( if let Some(pattern14_0) = C::lshl_from_imm64(ctx, pattern11_1, pattern13_0) { - // Rule at src/isa/aarch64/inst.isle line 1998. + // Rule at src/isa/aarch64/inst.isle line 1997. let expr0_0 = C::put_in_reg(ctx, pattern2_0); let expr1_0 = C::put_in_reg(ctx, pattern8_0); let expr2_0 = constructor_alu_rrr_shift( @@ -3500,7 +3482,7 @@ pub fn constructor_alu_rs_imm_logic( _ => {} } } - // Rule at src/isa/aarch64/inst.isle line 1994. + // Rule at src/isa/aarch64/inst.isle line 1993. let expr0_0 = C::put_in_reg(ctx, pattern2_0); let expr1_0 = C::put_in_reg(ctx, pattern3_0); let expr2_0 = constructor_alu_rrr(ctx, pattern0_0, pattern1_0, expr0_0, expr1_0)?; @@ -3519,7 +3501,7 @@ pub fn constructor_i128_alu_bitop( let pattern1_0 = arg1; let pattern2_0 = arg2; let pattern3_0 = arg3; - // Rule at src/isa/aarch64/inst.isle line 2006. + // Rule at src/isa/aarch64/inst.isle line 2005. let expr0_0 = C::put_in_regs(ctx, pattern2_0); let expr1_0: usize = 0; let expr2_0 = C::value_regs_get(ctx, expr0_0, expr1_0); @@ -3546,7 +3528,7 @@ pub fn constructor_float_cmp_zero( let pattern0_0 = arg0; let pattern1_0 = arg1; let pattern2_0 = arg2; - // Rule at src/isa/aarch64/inst.isle line 2046. + // Rule at src/isa/aarch64/inst.isle line 2045. let expr0_0 = C::float_cc_cmp_zero_to_vec_misc_op(ctx, pattern0_0); let expr1_0 = constructor_vec_misc(ctx, &expr0_0, pattern1_0, pattern2_0)?; return Some(expr1_0); @@ -3562,7 +3544,7 @@ pub fn constructor_float_cmp_zero_swap( let pattern0_0 = arg0; let pattern1_0 = arg1; let pattern2_0 = arg2; - // Rule at src/isa/aarch64/inst.isle line 2051. + // Rule at src/isa/aarch64/inst.isle line 2050. let expr0_0 = C::float_cc_cmp_zero_to_vec_misc_op_swap(ctx, pattern0_0); let expr1_0 = constructor_vec_misc(ctx, &expr0_0, pattern1_0, pattern2_0)?; return Some(expr1_0); @@ -3572,7 +3554,7 @@ pub fn constructor_float_cmp_zero_swap( pub fn constructor_fcmeq0(ctx: &mut C, arg0: Reg, arg1: &VectorSize) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/aarch64/inst.isle line 2056. + // Rule at src/isa/aarch64/inst.isle line 2055. let expr0_0 = VecMisc2::Fcmeq0; let expr1_0 = constructor_vec_misc(ctx, &expr0_0, pattern0_0, pattern1_0)?; return Some(expr1_0); @@ -3588,7 +3570,7 @@ pub fn constructor_int_cmp_zero( let pattern0_0 = arg0; let pattern1_0 = arg1; let pattern2_0 = arg2; - // Rule at src/isa/aarch64/inst.isle line 2082. + // Rule at src/isa/aarch64/inst.isle line 2081. let expr0_0 = C::int_cc_cmp_zero_to_vec_misc_op(ctx, pattern0_0); let expr1_0 = constructor_vec_misc(ctx, &expr0_0, pattern1_0, pattern2_0)?; return Some(expr1_0); @@ -3604,7 +3586,7 @@ pub fn constructor_int_cmp_zero_swap( let pattern0_0 = arg0; let pattern1_0 = arg1; let pattern2_0 = arg2; - // Rule at src/isa/aarch64/inst.isle line 2087. + // Rule at src/isa/aarch64/inst.isle line 2086. let expr0_0 = C::int_cc_cmp_zero_to_vec_misc_op_swap(ctx, pattern0_0); let expr1_0 = constructor_vec_misc(ctx, &expr0_0, pattern1_0, pattern2_0)?; return Some(expr1_0); @@ -3614,7 +3596,7 @@ pub fn constructor_int_cmp_zero_swap( pub fn constructor_cmeq0(ctx: &mut C, arg0: Reg, arg1: &VectorSize) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/aarch64/inst.isle line 2092. + // Rule at src/isa/aarch64/inst.isle line 2091. let expr0_0 = VecMisc2::Cmeq0; let expr1_0 = constructor_vec_misc(ctx, &expr0_0, pattern0_0, pattern1_0)?; return Some(expr1_0); diff --git a/cranelift/codegen/src/isa/aarch64/lower_inst.rs b/cranelift/codegen/src/isa/aarch64/lower_inst.rs index 10fe714730..577b004d6a 100644 --- a/cranelift/codegen/src/isa/aarch64/lower_inst.rs +++ b/cranelift/codegen/src/isa/aarch64/lower_inst.rs @@ -1,27 +1,22 @@ //! Lower a single Cranelift instruction into vcode. +use super::lower::*; use crate::binemit::CodeOffset; use crate::ir::condcodes::FloatCC; use crate::ir::types::*; use crate::ir::Inst as IRInst; use crate::ir::{InstructionData, Opcode, TrapCode}; +use crate::isa::aarch64::abi::*; +use crate::isa::aarch64::inst::*; use crate::isa::aarch64::settings as aarch64_settings; use crate::machinst::lower::*; use crate::machinst::*; use crate::settings::{Flags, TlsModel}; use crate::{CodegenError, CodegenResult}; - -use crate::isa::aarch64::abi::*; -use crate::isa::aarch64::inst::*; - -use regalloc::Writable; - use alloc::boxed::Box; use alloc::vec::Vec; use core::convert::TryFrom; -use super::lower::*; - /// Actually codegen an instruction's results into registers. pub(crate) fn lower_insn_to_regs>( ctx: &mut C, @@ -766,7 +761,7 @@ pub(crate) fn lower_insn_to_regs>( Opcode::Trap | Opcode::ResumableTrap => { let trap_code = ctx.data(insn).trap_code().unwrap(); - ctx.emit_safepoint(Inst::Udf { trap_code }); + ctx.emit(Inst::Udf { trap_code }); } Opcode::Trapif | Opcode::Trapff => { @@ -797,7 +792,7 @@ pub(crate) fn lower_insn_to_regs>( cond }; - ctx.emit_safepoint(Inst::TrapIf { + ctx.emit(Inst::TrapIf { trap_code, kind: CondBrKind::Cond(cond), }); @@ -1507,35 +1502,34 @@ pub(crate) fn lower_insn_to_regs>( let lane_type = ty.lane_type(); let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); - let mut match_long_pair = - |ext_low_op, ext_high_op| -> Option<(VecRRPairLongOp, regalloc::Reg)> { - if let Some(lhs) = maybe_input_insn(ctx, inputs[0], ext_low_op) { - if let Some(rhs) = maybe_input_insn(ctx, inputs[1], ext_high_op) { - let lhs_inputs = insn_inputs(ctx, lhs); - let rhs_inputs = insn_inputs(ctx, rhs); - let low = put_input_in_reg(ctx, lhs_inputs[0], NarrowValueMode::None); - let high = put_input_in_reg(ctx, rhs_inputs[0], NarrowValueMode::None); - if low == high { - match (lane_type, ext_low_op) { - (I16, Opcode::SwidenLow) => { - return Some((VecRRPairLongOp::Saddlp8, low)) - } - (I32, Opcode::SwidenLow) => { - return Some((VecRRPairLongOp::Saddlp16, low)) - } - (I16, Opcode::UwidenLow) => { - return Some((VecRRPairLongOp::Uaddlp8, low)) - } - (I32, Opcode::UwidenLow) => { - return Some((VecRRPairLongOp::Uaddlp16, low)) - } - _ => (), - }; - } + let mut match_long_pair = |ext_low_op, ext_high_op| -> Option<(VecRRPairLongOp, Reg)> { + if let Some(lhs) = maybe_input_insn(ctx, inputs[0], ext_low_op) { + if let Some(rhs) = maybe_input_insn(ctx, inputs[1], ext_high_op) { + let lhs_inputs = insn_inputs(ctx, lhs); + let rhs_inputs = insn_inputs(ctx, rhs); + let low = put_input_in_reg(ctx, lhs_inputs[0], NarrowValueMode::None); + let high = put_input_in_reg(ctx, rhs_inputs[0], NarrowValueMode::None); + if low == high { + match (lane_type, ext_low_op) { + (I16, Opcode::SwidenLow) => { + return Some((VecRRPairLongOp::Saddlp8, low)) + } + (I32, Opcode::SwidenLow) => { + return Some((VecRRPairLongOp::Saddlp16, low)) + } + (I16, Opcode::UwidenLow) => { + return Some((VecRRPairLongOp::Uaddlp8, low)) + } + (I32, Opcode::UwidenLow) => { + return Some((VecRRPairLongOp::Uaddlp16, low)) + } + _ => (), + }; } } - None - }; + } + None + }; if let Some((op, rn)) = match_long_pair(Opcode::SwidenLow, Opcode::SwidenHigh) { ctx.emit(Inst::VecRRPairLong { op, rd, rn }); diff --git a/cranelift/codegen/src/isa/aarch64/mod.rs b/cranelift/codegen/src/isa/aarch64/mod.rs index 2a1a4a42f3..ec1ab0b35e 100644 --- a/cranelift/codegen/src/isa/aarch64/mod.rs +++ b/cranelift/codegen/src/isa/aarch64/mod.rs @@ -11,7 +11,7 @@ use crate::result::CodegenResult; use crate::settings as shared_settings; use alloc::{boxed::Box, vec::Vec}; use core::fmt; -use regalloc::{PrettyPrint, RealRegUniverse}; +use regalloc2::MachineEnv; use target_lexicon::{Aarch64Architecture, Architecture, Triple}; // New backend: @@ -21,7 +21,7 @@ mod lower; mod lower_inst; mod settings; -use inst::create_reg_universe; +use inst::create_reg_env; use self::inst::EmitInfo; @@ -30,7 +30,7 @@ pub struct AArch64Backend { triple: Triple, flags: shared_settings::Flags, isa_flags: aarch64_settings::Flags, - reg_universe: RealRegUniverse, + machine_env: MachineEnv, } impl AArch64Backend { @@ -40,12 +40,12 @@ impl AArch64Backend { flags: shared_settings::Flags, isa_flags: aarch64_settings::Flags, ) -> AArch64Backend { - let reg_universe = create_reg_universe(&flags); + let machine_env = create_reg_env(&flags); AArch64Backend { triple, flags, isa_flags, - reg_universe, + machine_env, } } @@ -55,10 +55,10 @@ impl AArch64Backend { &self, func: &Function, flags: shared_settings::Flags, - ) -> CodegenResult> { + ) -> CodegenResult<(VCode, regalloc2::Output)> { let emit_info = EmitInfo::new(flags.clone()); let abi = Box::new(abi::AArch64ABICallee::new(func, flags, self.isa_flags())?); - compile::compile::(func, self, abi, &self.reg_universe, emit_info) + compile::compile::(func, self, abi, &self.machine_env, emit_info) } } @@ -69,28 +69,27 @@ impl TargetIsa for AArch64Backend { want_disasm: bool, ) -> CodegenResult { let flags = self.flags(); - let vcode = self.compile_vcode(func, flags.clone())?; + let (vcode, regalloc_result) = self.compile_vcode(func, flags.clone())?; - let (buffer, bb_starts, bb_edges) = vcode.emit(); - let frame_size = vcode.frame_size(); - let stackslot_offsets = vcode.stackslot_offsets().clone(); + let want_disasm = want_disasm || log::log_enabled!(log::Level::Debug); + let emit_result = vcode.emit(®alloc_result, want_disasm, flags.machine_code_cfg_info()); + let frame_size = emit_result.frame_size; + let value_labels_ranges = emit_result.value_labels_ranges; + let buffer = emit_result.buffer.finish(); + let stackslot_offsets = emit_result.stackslot_offsets; - let disasm = if want_disasm { - Some(vcode.show_rru(Some(&create_reg_universe(flags)))) - } else { - None - }; - - let buffer = buffer.finish(); + if let Some(disasm) = emit_result.disasm.as_ref() { + log::debug!("disassembly:\n{}", disasm); + } Ok(MachCompileResult { buffer, frame_size, - disasm, - value_labels_ranges: Default::default(), + disasm: emit_result.disasm, + value_labels_ranges, stackslot_offsets, - bb_starts, - bb_edges, + bb_starts: emit_result.bb_offsets, + bb_edges: emit_result.bb_edges, }) } @@ -218,11 +217,11 @@ mod test { let buffer = backend.compile_function(&mut func, false).unwrap().buffer; let code = buffer.data(); - // mov x1, #0x1234 - // add w0, w0, w1 + // mov x3, #0x1234 + // add w0, w0, w3 // ret let golden = vec![ - 0x81, 0x46, 0x82, 0xd2, 0x00, 0x00, 0x01, 0x0b, 0xc0, 0x03, 0x5f, 0xd6, + 0x83, 0x46, 0x82, 0xd2, 0x00, 0x00, 0x03, 0x0b, 0xc0, 0x03, 0x5f, 0xd6, ]; assert_eq!(code, &golden[..]); @@ -273,23 +272,24 @@ mod test { .unwrap(); let code = result.buffer.data(); - // mov x1, #0x1234 // #4660 - // add w0, w0, w1 - // mov w1, w0 - // cbnz x1, 0x28 - // mov x1, #0x1234 // #4660 - // add w1, w0, w1 - // mov w1, w1 - // cbnz x1, 0x18 - // mov w1, w0 - // cbnz x1, 0x18 - // mov x1, #0x1234 // #4660 - // sub w0, w0, w1 + // mov x10, #0x1234 // #4660 + // add w12, w0, w10 + // mov w11, w12 + // cbnz x11, 0x20 + // mov x13, #0x1234 // #4660 + // add w15, w12, w13 + // mov w14, w15 + // cbnz x14, 0x10 + // mov w1, w12 + // cbnz x1, 0x10 + // mov x2, #0x1234 // #4660 + // sub w0, w12, w2 // ret + let golden = vec![ - 129, 70, 130, 210, 0, 0, 1, 11, 225, 3, 0, 42, 161, 0, 0, 181, 129, 70, 130, 210, 1, 0, - 1, 11, 225, 3, 1, 42, 161, 255, 255, 181, 225, 3, 0, 42, 97, 255, 255, 181, 129, 70, - 130, 210, 0, 0, 1, 75, 192, 3, 95, 214, + 138, 70, 130, 210, 12, 0, 10, 11, 235, 3, 12, 42, 171, 0, 0, 181, 141, 70, 130, 210, + 143, 1, 13, 11, 238, 3, 15, 42, 174, 255, 255, 181, 225, 3, 12, 42, 97, 255, 255, 181, + 130, 70, 130, 210, 128, 1, 2, 75, 192, 3, 95, 214, ]; assert_eq!(code, &golden[..]); diff --git a/cranelift/codegen/src/isa/mod.rs b/cranelift/codegen/src/isa/mod.rs index 821b9627ea..691c90e32b 100644 --- a/cranelift/codegen/src/isa/mod.rs +++ b/cranelift/codegen/src/isa/mod.rs @@ -235,7 +235,10 @@ pub trait TargetIsa: fmt::Display + Send + Sync { #[cfg(feature = "unwind")] /// Map a regalloc::Reg to its corresponding DWARF register. - fn map_regalloc_reg_to_dwarf(&self, _: ::regalloc::Reg) -> Result { + fn map_regalloc_reg_to_dwarf( + &self, + _: crate::machinst::Reg, + ) -> Result { Err(RegisterMappingError::UnsupportedArchitecture) } diff --git a/cranelift/codegen/src/isa/s390x/abi.rs b/cranelift/codegen/src/isa/s390x/abi.rs index 1ab9a1f001..8038eb581c 100644 --- a/cranelift/codegen/src/isa/s390x/abi.rs +++ b/cranelift/codegen/src/isa/s390x/abi.rs @@ -66,11 +66,13 @@ use crate::isa; use crate::isa::s390x::inst::*; use crate::isa::unwind::UnwindInst; use crate::machinst::*; +use crate::machinst::{RealReg, Reg, RegClass, Writable}; use crate::settings; use crate::{CodegenError, CodegenResult}; use alloc::boxed::Box; use alloc::vec::Vec; -use regalloc::{RealReg, Reg, RegClass, Set, Writable}; +use regalloc2::PReg; +use regalloc2::VReg; use smallvec::{smallvec, SmallVec}; use std::convert::TryFrom; @@ -235,7 +237,7 @@ impl ABIMachineSpec for S390xMachineDeps { if let Some(reg) = candidate { ret.push(ABIArg::reg( - reg.to_real_reg(), + reg.to_real_reg().unwrap(), param.value_type, param.extension, param.purpose, @@ -279,7 +281,7 @@ impl ABIMachineSpec for S390xMachineDeps { debug_assert!(args_or_rets == ArgsOrRets::Args); if let Some(reg) = get_intreg_for_arg(next_gpr) { ret.push(ABIArg::reg( - reg.to_real_reg(), + reg.to_real_reg().unwrap(), types::I64, ir::ArgumentExtension::None, ir::ArgumentPurpose::Normal, @@ -340,8 +342,11 @@ impl ABIMachineSpec for S390xMachineDeps { } } - fn gen_ret() -> Inst { - Inst::Ret { link: gpr(14) } + fn gen_ret(rets: Vec) -> Inst { + Inst::Ret { + link: gpr(14), + rets, + } } fn gen_add_imm(into_reg: Writable, from_reg: Reg, imm: u32) -> SmallInstVec { @@ -462,7 +467,7 @@ impl ABIMachineSpec for S390xMachineDeps { _call_conv: isa::CallConv, _setup_frame: bool, flags: &settings::Flags, - clobbered_callee_saves: &Vec>, + clobbered_callee_saves: &[Writable], fixed_frame_storage_size: u32, outgoing_args_size: u32, ) -> (u64, SmallVec<[Inst; 16]>) { @@ -471,16 +476,15 @@ impl ABIMachineSpec for S390xMachineDeps { let mut clobbered_gpr = vec![]; for ® in clobbered_callee_saves.iter() { - match reg.to_reg().get_class() { - RegClass::I64 => clobbered_gpr.push(reg), - RegClass::F64 => clobbered_fpr.push(reg), - class => panic!("Unexpected RegClass: {:?}", class), + match reg.to_reg().class() { + RegClass::Int => clobbered_gpr.push(reg), + RegClass::Float => clobbered_fpr.push(reg), } } let mut first_clobbered_gpr = 16; for reg in clobbered_gpr { - let enc = reg.to_reg().get_hw_encoding(); + let enc = reg.to_reg().hw_enc(); if enc < first_clobbered_gpr { first_clobbered_gpr = enc; } @@ -499,7 +503,7 @@ impl ABIMachineSpec for S390xMachineDeps { if first_clobbered_gpr < 16 { let offset = 8 * first_clobbered_gpr as i64; insts.push(Inst::StoreMultiple64 { - rt: gpr(first_clobbered_gpr as u8), + rt: gpr(first_clobbered_gpr), rt2: gpr(15), mem: MemArg::reg_plus_off(stack_reg(), offset, MemFlags::trusted()), }); @@ -509,7 +513,7 @@ impl ABIMachineSpec for S390xMachineDeps { insts.push(Inst::Unwind { inst: UnwindInst::SaveReg { clobber_offset: clobber_size as u32 + (i * 8) as u32, - reg: gpr(i as u8).to_real_reg(), + reg: gpr(i).to_real_reg().unwrap(), }, }); } @@ -535,7 +539,7 @@ impl ABIMachineSpec for S390xMachineDeps { // Save FPRs. for (i, reg) in clobbered_fpr.iter().enumerate() { insts.push(Inst::FpuStore64 { - rd: reg.to_reg().to_reg(), + rd: reg.to_reg().into(), mem: MemArg::reg_plus_off( stack_reg(), (i * 8) as i64 + outgoing_args_size as i64 + fixed_frame_storage_size as i64, @@ -558,7 +562,7 @@ impl ABIMachineSpec for S390xMachineDeps { fn gen_clobber_restore( call_conv: isa::CallConv, _: &settings::Flags, - clobbers: &Set>, + clobbers: &[Writable], fixed_frame_storage_size: u32, outgoing_args_size: u32, ) -> SmallVec<[Inst; 16]> { @@ -568,7 +572,7 @@ impl ABIMachineSpec for S390xMachineDeps { let (clobbered_gpr, clobbered_fpr) = get_regs_saved_in_prologue(call_conv, clobbers); let mut first_clobbered_gpr = 16; for reg in clobbered_gpr { - let enc = reg.to_reg().get_hw_encoding(); + let enc = reg.to_reg().hw_enc(); if enc < first_clobbered_gpr { first_clobbered_gpr = enc; } @@ -578,7 +582,7 @@ impl ABIMachineSpec for S390xMachineDeps { // Restore FPRs. for (i, reg) in clobbered_fpr.iter().enumerate() { insts.push(Inst::FpuLoad64 { - rd: Writable::from_reg(reg.to_reg().to_reg()), + rd: Writable::from_reg(reg.to_reg().into()), mem: MemArg::reg_plus_off( stack_reg(), (i * 8) as i64 + outgoing_args_size as i64 + fixed_frame_storage_size as i64, @@ -603,7 +607,7 @@ impl ABIMachineSpec for S390xMachineDeps { offset += stack_size as i64; } insts.push(Inst::LoadMultiple64 { - rt: writable_gpr(first_clobbered_gpr as u8), + rt: writable_gpr(first_clobbered_gpr), rt2: writable_gpr(15), mem: MemArg::reg_plus_off(stack_reg(), offset, MemFlags::trusted()), }); @@ -620,55 +624,43 @@ impl ABIMachineSpec for S390xMachineDeps { tmp: Writable, _callee_conv: isa::CallConv, _caller_conv: isa::CallConv, - ) -> SmallVec<[(InstIsSafepoint, Inst); 2]> { + ) -> SmallVec<[Inst; 2]> { let mut insts = SmallVec::new(); match &dest { - &CallDest::ExtName(ref name, RelocDistance::Near) => insts.push(( - InstIsSafepoint::Yes, - Inst::Call { - link: writable_gpr(14), - info: Box::new(CallInfo { - dest: name.clone(), - uses, - defs, - opcode, - }), - }, - )), + &CallDest::ExtName(ref name, RelocDistance::Near) => insts.push(Inst::Call { + link: writable_gpr(14), + info: Box::new(CallInfo { + dest: name.clone(), + uses, + defs, + opcode, + }), + }), &CallDest::ExtName(ref name, RelocDistance::Far) => { - insts.push(( - InstIsSafepoint::No, - Inst::LoadExtNameFar { - rd: tmp, - name: Box::new(name.clone()), - offset: 0, - }, - )); - insts.push(( - InstIsSafepoint::Yes, - Inst::CallInd { - link: writable_gpr(14), - info: Box::new(CallIndInfo { - rn: tmp.to_reg(), - uses, - defs, - opcode, - }), - }, - )); - } - &CallDest::Reg(reg) => insts.push(( - InstIsSafepoint::Yes, - Inst::CallInd { + insts.push(Inst::LoadExtNameFar { + rd: tmp, + name: Box::new(name.clone()), + offset: 0, + }); + insts.push(Inst::CallInd { link: writable_gpr(14), info: Box::new(CallIndInfo { - rn: *reg, + rn: tmp.to_reg(), uses, defs, opcode, }), - }, - )), + }); + } + &CallDest::Reg(reg) => insts.push(Inst::CallInd { + link: writable_gpr(14), + info: Box::new(CallIndInfo { + rn: *reg, + uses, + defs, + opcode, + }), + }), } insts @@ -686,9 +678,8 @@ impl ABIMachineSpec for S390xMachineDeps { fn get_number_of_spillslots_for_value(rc: RegClass) -> u32 { // We allocate in terms of 8-byte slots. match rc { - RegClass::I64 => 1, - RegClass::F64 => 1, - _ => panic!("Unexpected register class!"), + RegClass::Int => 1, + RegClass::Float => 1, } } @@ -706,13 +697,13 @@ impl ABIMachineSpec for S390xMachineDeps { let mut caller_saved = Vec::new(); for i in 0..15 { let x = writable_gpr(i); - if is_reg_clobbered_by_call(call_conv_of_callee, x.to_reg().to_real_reg()) { + if is_reg_clobbered_by_call(call_conv_of_callee, x.to_reg().to_real_reg().unwrap()) { caller_saved.push(x); } } for i in 0..15 { let v = writable_fpr(i); - if is_reg_clobbered_by_call(call_conv_of_callee, v.to_reg().to_real_reg()) { + if is_reg_clobbered_by_call(call_conv_of_callee, v.to_reg().to_real_reg().unwrap()) { caller_saved.push(v); } } @@ -728,7 +719,7 @@ impl ABIMachineSpec for S390xMachineDeps { fn get_clobbered_callee_saves( call_conv: isa::CallConv, - regs: &Set>, + regs: &[Writable], ) -> Vec> { let mut regs: Vec> = regs .iter() @@ -738,7 +729,7 @@ impl ABIMachineSpec for S390xMachineDeps { // Sort registers for deterministic code output. We can do an unstable // sort because the registers will be unique (there are no dups). - regs.sort_unstable_by_key(|r| r.to_reg().get_index()); + regs.sort_unstable_by_key(|r| PReg::from(r.to_reg()).index()); regs } @@ -754,50 +745,47 @@ impl ABIMachineSpec for S390xMachineDeps { } fn is_reg_saved_in_prologue(_call_conv: isa::CallConv, r: RealReg) -> bool { - match r.get_class() { - RegClass::I64 => { + match r.class() { + RegClass::Int => { // r6 - r15 inclusive are callee-saves. - r.get_hw_encoding() >= 6 && r.get_hw_encoding() <= 15 + r.hw_enc() >= 6 && r.hw_enc() <= 15 } - RegClass::F64 => { + RegClass::Float => { // f8 - f15 inclusive are callee-saves. - r.get_hw_encoding() >= 8 && r.get_hw_encoding() <= 15 + r.hw_enc() >= 8 && r.hw_enc() <= 15 } - _ => panic!("Unexpected RegClass"), } } fn get_regs_saved_in_prologue( call_conv: isa::CallConv, - regs: &Set>, + regs: &[Writable], ) -> (Vec>, Vec>) { let mut int_saves = vec![]; let mut fpr_saves = vec![]; - for ® in regs.iter() { + for ® in regs { if is_reg_saved_in_prologue(call_conv, reg.to_reg()) { - match reg.to_reg().get_class() { - RegClass::I64 => int_saves.push(reg), - RegClass::F64 => fpr_saves.push(reg), - _ => panic!("Unexpected RegClass"), + match reg.to_reg().class() { + RegClass::Int => int_saves.push(reg), + RegClass::Float => fpr_saves.push(reg), } } } // Sort registers for deterministic code output. - int_saves.sort_by_key(|r| r.to_reg().get_index()); - fpr_saves.sort_by_key(|r| r.to_reg().get_index()); + int_saves.sort_by_key(|r| VReg::from(r.to_reg()).vreg()); + fpr_saves.sort_by_key(|r| VReg::from(r.to_reg()).vreg()); (int_saves, fpr_saves) } fn is_reg_clobbered_by_call(_call_conv: isa::CallConv, r: RealReg) -> bool { - match r.get_class() { - RegClass::I64 => { + match r.class() { + RegClass::Int => { // r0 - r5 inclusive are caller-saves. - r.get_hw_encoding() <= 5 + r.hw_enc() <= 5 } - RegClass::F64 => { + RegClass::Float => { // f0 - f7 inclusive are caller-saves. - r.get_hw_encoding() <= 7 + r.hw_enc() <= 7 } - _ => panic!("Unexpected RegClass"), } } diff --git a/cranelift/codegen/src/isa/s390x/inst.isle b/cranelift/codegen/src/isa/s390x/inst.isle index 09a41a4fe7..6c127fbc14 100644 --- a/cranelift/codegen/src/isa/s390x/inst.isle +++ b/cranelift/codegen/src/isa/s390x/inst.isle @@ -601,7 +601,8 @@ ;; A machine return instruction. (Ret - (link Reg)) + (link Reg) + (rets VecReg)) ;; A placeholder instruction, generating no code, meaning that a function epilogue must be ;; inserted there. @@ -696,11 +697,10 @@ (VirtualSPOffsetAdj (offset i64)) - ;; A definition of a value label. - (ValueLabelMarker - (reg Reg) - (label ValueLabel)) - + ;; Pseudoinstruction to keep a value alive. + (DummyUse + (reg Reg)) + ;; An unwind pseudoinstruction describing the state of the ;; machine at this program point. (Unwind diff --git a/cranelift/codegen/src/isa/s390x/inst/args.rs b/cranelift/codegen/src/isa/s390x/inst/args.rs index 73f96c61a9..1f8e8ca623 100644 --- a/cranelift/codegen/src/isa/s390x/inst/args.rs +++ b/cranelift/codegen/src/isa/s390x/inst/args.rs @@ -7,8 +7,7 @@ use crate::ir::condcodes::{FloatCC, IntCC}; use crate::ir::MemFlags; use crate::isa::s390x::inst::*; use crate::machinst::MachLabel; - -use regalloc::{PrettyPrint, RealRegUniverse, Reg}; +use crate::machinst::{PrettyPrint, Reg}; use std::string::String; @@ -113,6 +112,40 @@ impl MemArg { pub(crate) fn can_trap(&self) -> bool { !self.get_flags().notrap() } + + /// Edit registers with allocations. + pub fn with_allocs(&self, allocs: &mut AllocationConsumer<'_>) -> Self { + match self { + &MemArg::BXD12 { + base, + index, + disp, + flags, + } => MemArg::BXD12 { + base: allocs.next(base), + index: allocs.next(index), + disp, + flags, + }, + &MemArg::BXD20 { + base, + index, + disp, + flags, + } => MemArg::BXD20 { + base: allocs.next(base), + index: allocs.next(index), + disp, + flags, + }, + &MemArg::RegOffset { reg, off, flags } => MemArg::RegOffset { + reg: allocs.next(reg), + off, + flags, + }, + x => x.clone(), + } + } } //============================================================================= @@ -183,49 +216,53 @@ impl Cond { } impl PrettyPrint for MemArg { - fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String { + fn pretty_print(&self, _: u8, allocs: &mut AllocationConsumer<'_>) -> String { match self { &MemArg::BXD12 { base, index, disp, .. } => { + let base = allocs.next(base); + let index = allocs.next(index); if base != zero_reg() { if index != zero_reg() { format!( "{}({},{})", - disp.show_rru(mb_rru), - index.show_rru(mb_rru), - base.show_rru(mb_rru) + disp.pretty_print_default(), + show_reg(index), + show_reg(base), ) } else { - format!("{}({})", disp.show_rru(mb_rru), base.show_rru(mb_rru)) + format!("{}({})", disp.pretty_print_default(), show_reg(base)) } } else { if index != zero_reg() { - format!("{}({},)", disp.show_rru(mb_rru), index.show_rru(mb_rru)) + format!("{}({},)", disp.pretty_print_default(), show_reg(index)) } else { - format!("{}", disp.show_rru(mb_rru)) + format!("{}", disp.pretty_print_default()) } } } &MemArg::BXD20 { base, index, disp, .. } => { + let base = allocs.next(base); + let index = allocs.next(index); if base != zero_reg() { if index != zero_reg() { format!( "{}({},{})", - disp.show_rru(mb_rru), - index.show_rru(mb_rru), - base.show_rru(mb_rru) + disp.pretty_print_default(), + show_reg(index), + show_reg(base), ) } else { - format!("{}({})", disp.show_rru(mb_rru), base.show_rru(mb_rru)) + format!("{}({})", disp.pretty_print_default(), show_reg(base)) } } else { if index != zero_reg() { - format!("{}({},)", disp.show_rru(mb_rru), index.show_rru(mb_rru)) + format!("{}({},)", disp.pretty_print_default(), show_reg(index)) } else { - format!("{}", disp.show_rru(mb_rru)) + format!("{}", disp.pretty_print_default()) } } } @@ -244,7 +281,7 @@ impl PrettyPrint for MemArg { } impl PrettyPrint for Cond { - fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String { + fn pretty_print(&self, _: u8, _: &mut AllocationConsumer<'_>) -> String { let s = match self.mask { 1 => "o", 2 => "h", diff --git a/cranelift/codegen/src/isa/s390x/inst/emit.rs b/cranelift/codegen/src/isa/s390x/inst/emit.rs index 46067ed4b9..9a3dc3a76d 100644 --- a/cranelift/codegen/src/isa/s390x/inst/emit.rs +++ b/cranelift/codegen/src/isa/s390x/inst/emit.rs @@ -5,8 +5,10 @@ use crate::ir::MemFlags; use crate::ir::{SourceLoc, TrapCode}; use crate::isa::s390x::inst::*; use crate::isa::s390x::settings as s390x_settings; +use crate::machinst::reg::count_operands; +use crate::machinst::{Reg, RegClass}; use core::convert::TryFrom; -use regalloc::{Reg, RegClass}; +use regalloc2::Allocation; /// Memory addressing mode finalization: convert "special" modes (e.g., /// generic arbitrary stack offset) into real addressing modes, possibly by @@ -125,7 +127,7 @@ pub fn mem_emit( true, ); for inst in mem_insts.into_iter() { - inst.emit(sink, emit_info, state); + inst.emit(&[], sink, emit_info, state); } if add_trap && mem.can_trap() { @@ -195,7 +197,7 @@ pub fn mem_rs_emit( false, ); for inst in mem_insts.into_iter() { - inst.emit(sink, emit_info, state); + inst.emit(&[], sink, emit_info, state); } if add_trap && mem.can_trap() { @@ -237,7 +239,7 @@ pub fn mem_imm8_emit( ) { let (mem_insts, mem) = mem_finalize(mem, state, true, true, false, false); for inst in mem_insts.into_iter() { - inst.emit(sink, emit_info, state); + inst.emit(&[], sink, emit_info, state); } if add_trap && mem.can_trap() { @@ -275,7 +277,7 @@ pub fn mem_imm16_emit( ) { let (mem_insts, mem) = mem_finalize(mem, state, true, false, false, false); for inst in mem_insts.into_iter() { - inst.emit(sink, emit_info, state); + inst.emit(&[], sink, emit_info, state); } if add_trap && mem.can_trap() { @@ -300,17 +302,17 @@ pub fn mem_imm16_emit( // Instructions and subcomponents: emission fn machreg_to_gpr(m: Reg) -> u8 { - assert_eq!(m.get_class(), RegClass::I64); - u8::try_from(m.to_real_reg().get_hw_encoding()).unwrap() + assert_eq!(m.class(), RegClass::Int); + u8::try_from(m.to_real_reg().unwrap().hw_enc()).unwrap() } fn machreg_to_fpr(m: Reg) -> u8 { - assert_eq!(m.get_class(), RegClass::F64); - u8::try_from(m.to_real_reg().get_hw_encoding()).unwrap() + assert_eq!(m.class(), RegClass::Float); + u8::try_from(m.to_real_reg().unwrap().hw_enc()).unwrap() } fn machreg_to_gpr_or_fpr(m: Reg) -> u8 { - u8::try_from(m.to_real_reg().get_hw_encoding()).unwrap() + u8::try_from(m.to_real_reg().unwrap().hw_enc()).unwrap() } /// E-type instructions. @@ -936,7 +938,15 @@ impl MachInstEmit for Inst { type State = EmitState; type Info = EmitInfo; - fn emit(&self, sink: &mut MachBuffer, emit_info: &Self::Info, state: &mut EmitState) { + fn emit( + &self, + allocs: &[Allocation], + sink: &mut MachBuffer, + emit_info: &Self::Info, + state: &mut EmitState, + ) { + let mut allocs = AllocationConsumer::new(allocs); + // Verify that we can emit this Inst in the current ISA let matches_isa_flags = |iset_requirement: &InstructionSet| -> bool { match iset_requirement { @@ -965,6 +975,10 @@ impl MachInstEmit for Inst { match self { &Inst::AluRRR { alu_op, rd, rn, rm } => { + let rd = allocs.next_writable(rd); + let rn = allocs.next(rn); + let rm = allocs.next(rm); + let (opcode, have_rr) = match alu_op { ALUOp::Add32 => (0xb9f8, true), // ARK ALUOp::Add64 => (0xb9e8, true), // AGRK @@ -992,7 +1006,7 @@ impl MachInstEmit for Inst { }; if have_rr && rd.to_reg() == rn { let inst = Inst::AluRR { alu_op, rd, rm }; - inst.emit(sink, emit_info, state); + inst.emit(&[], sink, emit_info, state); } else { put(sink, &enc_rrf_ab(opcode, rd.to_reg(), rn, rm, 0)); } @@ -1003,9 +1017,12 @@ impl MachInstEmit for Inst { rn, imm, } => { + let rd = allocs.next_writable(rd); + let rn = allocs.next(rn); + if rd.to_reg() == rn { let inst = Inst::AluRSImm16 { alu_op, rd, imm }; - inst.emit(sink, emit_info, state); + inst.emit(&[], sink, emit_info, state); } else { let opcode = match alu_op { ALUOp::Add32 => 0xecd8, // AHIK @@ -1016,6 +1033,9 @@ impl MachInstEmit for Inst { } } &Inst::AluRR { alu_op, rd, rm } => { + let rd = allocs.next_writable(rd); + let rm = allocs.next(rm); + let (opcode, is_rre) = match alu_op { ALUOp::Add32 => (0x1a, false), // AR ALUOp::Add64 => (0xb908, true), // AGR @@ -1051,6 +1071,9 @@ impl MachInstEmit for Inst { rd, ref mem, } => { + let rd = allocs.next_writable(rd); + let mem = mem.with_allocs(&mut allocs); + let (opcode_rx, opcode_rxy) = match alu_op { ALUOp::Add32 => (Some(0x5a), Some(0xe35a)), // A(Y) ALUOp::Add32Ext16 => (Some(0x4a), Some(0xe34a)), // AH(Y) @@ -1083,10 +1106,12 @@ impl MachInstEmit for Inst { }; let rd = rd.to_reg(); mem_emit( - rd, mem, opcode_rx, opcode_rxy, None, true, sink, emit_info, state, + rd, &mem, opcode_rx, opcode_rxy, None, true, sink, emit_info, state, ); } &Inst::AluRSImm16 { alu_op, rd, imm } => { + let rd = allocs.next_writable(rd); + let opcode = match alu_op { ALUOp::Add32 => 0xa7a, // AHI ALUOp::Add64 => 0xa7b, // AGHI @@ -1097,6 +1122,8 @@ impl MachInstEmit for Inst { put(sink, &enc_ri_a(opcode, rd.to_reg(), imm as u16)); } &Inst::AluRSImm32 { alu_op, rd, imm } => { + let rd = allocs.next_writable(rd); + let opcode = match alu_op { ALUOp::Add32 => 0xc29, // AFI ALUOp::Add64 => 0xc28, // AGFI @@ -1107,6 +1134,8 @@ impl MachInstEmit for Inst { put(sink, &enc_ril_a(opcode, rd.to_reg(), imm as u32)); } &Inst::AluRUImm32 { alu_op, rd, imm } => { + let rd = allocs.next_writable(rd); + let opcode = match alu_op { ALUOp::AddLogical32 => 0xc2b, // ALFI ALUOp::AddLogical64 => 0xc2a, // ALGFI @@ -1117,6 +1146,8 @@ impl MachInstEmit for Inst { put(sink, &enc_ril_a(opcode, rd.to_reg(), imm)); } &Inst::AluRUImm16Shifted { alu_op, rd, imm } => { + let rd = allocs.next_writable(rd); + let opcode = match (alu_op, imm.shift) { (ALUOp::And32, 0) => 0xa57, // NILL (ALUOp::And32, 1) => 0xa56, // NILH @@ -1135,6 +1166,8 @@ impl MachInstEmit for Inst { put(sink, &enc_ri_a(opcode, rd.to_reg(), imm.bits)); } &Inst::AluRUImm32Shifted { alu_op, rd, imm } => { + let rd = allocs.next_writable(rd); + let opcode = match (alu_op, imm.shift) { (ALUOp::And32, 0) => 0xc0b, // NILF (ALUOp::And64, 0) => 0xc0b, // NILF @@ -1151,38 +1184,53 @@ impl MachInstEmit for Inst { } &Inst::SMulWide { rn, rm } => { + let rn = allocs.next(rn); + let rm = allocs.next(rm); + let opcode = 0xb9ec; // MGRK put(sink, &enc_rrf_ab(opcode, gpr(0), rn, rm, 0)); } &Inst::UMulWide { rn } => { + let rn = allocs.next(rn); + let opcode = 0xb986; // MLGR put(sink, &enc_rre(opcode, gpr(0), rn)); } &Inst::SDivMod32 { rn } => { + let rn = allocs.next(rn); + let opcode = 0xb91d; // DSGFR let srcloc = state.cur_srcloc(); let trap_code = TrapCode::IntegerDivisionByZero; put_with_trap(sink, &enc_rre(opcode, gpr(0), rn), srcloc, trap_code); } &Inst::SDivMod64 { rn } => { + let rn = allocs.next(rn); + let opcode = 0xb90d; // DSGR let srcloc = state.cur_srcloc(); let trap_code = TrapCode::IntegerDivisionByZero; put_with_trap(sink, &enc_rre(opcode, gpr(0), rn), srcloc, trap_code); } &Inst::UDivMod32 { rn } => { + let rn = allocs.next(rn); + let opcode = 0xb997; // DLR let srcloc = state.cur_srcloc(); let trap_code = TrapCode::IntegerDivisionByZero; put_with_trap(sink, &enc_rre(opcode, gpr(0), rn), srcloc, trap_code); } &Inst::UDivMod64 { rn } => { + let rn = allocs.next(rn); + let opcode = 0xb987; // DLGR let srcloc = state.cur_srcloc(); let trap_code = TrapCode::IntegerDivisionByZero; put_with_trap(sink, &enc_rre(opcode, gpr(0), rn), srcloc, trap_code); } &Inst::Flogr { rn } => { + let rn = allocs.next(rn); + let opcode = 0xb983; // FLOGR put(sink, &enc_rre(opcode, gpr(0), rn)); } @@ -1194,6 +1242,10 @@ impl MachInstEmit for Inst { shift_imm, shift_reg, } => { + let rd = allocs.next_writable(rd); + let rn = allocs.next(rn); + let shift_reg = allocs.next(shift_reg); + let opcode = match shift_op { ShiftOp::RotL32 => 0xeb1d, // RLL ShiftOp::RotL64 => 0xeb1c, // RLLG @@ -1218,6 +1270,9 @@ impl MachInstEmit for Inst { end_bit, rotate_amt, } => { + let rd = allocs.next_writable(rd); + let rn = allocs.next(rn); + let opcode = match op { RxSBGOp::Insert => 0xec59, // RISBGN RxSBGOp::And => 0xec54, // RNSBG @@ -1245,6 +1300,9 @@ impl MachInstEmit for Inst { end_bit, rotate_amt, } => { + let rd = allocs.next(rd); + let rn = allocs.next(rn); + let opcode = match op { RxSBGOp::And => 0xec54, // RNSBG RxSBGOp::Or => 0xec56, // ROSBG @@ -1265,6 +1323,9 @@ impl MachInstEmit for Inst { } &Inst::UnaryRR { op, rd, rn } => { + let rd = allocs.next_writable(rd); + let rn = allocs.next(rn); + match op { UnaryOp::Abs32 => { let opcode = 0x10; // LPR @@ -1316,6 +1377,9 @@ impl MachInstEmit for Inst { from_bits, to_bits, } => { + let rd = allocs.next_writable(rd); + let rn = allocs.next(rn); + let opcode = match (signed, from_bits, to_bits) { (_, 1, 32) => 0xb926, // LBR (_, 1, 64) => 0xb906, // LGBR @@ -1338,6 +1402,9 @@ impl MachInstEmit for Inst { } &Inst::CmpRR { op, rn, rm } => { + let rn = allocs.next(rn); + let rm = allocs.next(rm); + let (opcode, is_rre) = match op { CmpOp::CmpS32 => (0x19, false), // CR CmpOp::CmpS64 => (0xb920, true), // CGR @@ -1354,6 +1421,9 @@ impl MachInstEmit for Inst { } } &Inst::CmpRX { op, rn, ref mem } => { + let rn = allocs.next(rn); + let mem = mem.with_allocs(&mut allocs); + let (opcode_rx, opcode_rxy, opcode_ril) = match op { CmpOp::CmpS32 => (Some(0x59), Some(0xe359), Some(0xc6d)), // C(Y), CRL CmpOp::CmpS32Ext16 => (Some(0x49), Some(0xe379), Some(0xc65)), // CH(Y), CHRL @@ -1367,10 +1437,12 @@ impl MachInstEmit for Inst { CmpOp::CmpL64Ext32 => (None, Some(0xe331), Some(0xc6e)), // CLGF, CLGFRL }; mem_emit( - rn, mem, opcode_rx, opcode_rxy, opcode_ril, true, sink, emit_info, state, + rn, &mem, opcode_rx, opcode_rxy, opcode_ril, true, sink, emit_info, state, ); } &Inst::CmpRSImm16 { op, rn, imm } => { + let rn = allocs.next(rn); + let opcode = match op { CmpOp::CmpS32 => 0xa7e, // CHI CmpOp::CmpS64 => 0xa7f, // CGHI @@ -1379,6 +1451,8 @@ impl MachInstEmit for Inst { put(sink, &enc_ri_a(opcode, rn, imm as u16)); } &Inst::CmpRSImm32 { op, rn, imm } => { + let rn = allocs.next(rn); + let opcode = match op { CmpOp::CmpS32 => 0xc2d, // CFI CmpOp::CmpS64 => 0xc2c, // CGFI @@ -1387,6 +1461,8 @@ impl MachInstEmit for Inst { put(sink, &enc_ril_a(opcode, rn, imm as u32)); } &Inst::CmpRUImm32 { op, rn, imm } => { + let rn = allocs.next(rn); + let opcode = match op { CmpOp::CmpL32 => 0xc2f, // CLFI CmpOp::CmpL64 => 0xc2e, // CLGFI @@ -1401,6 +1477,9 @@ impl MachInstEmit for Inst { cond, trap_code, } => { + let rn = allocs.next(rn); + let rm = allocs.next(rm); + let opcode = match op { CmpOp::CmpS32 => 0xb972, // CRT CmpOp::CmpS64 => 0xb960, // CGRT @@ -1423,6 +1502,8 @@ impl MachInstEmit for Inst { cond, trap_code, } => { + let rn = allocs.next(rn); + let opcode = match op { CmpOp::CmpS32 => 0xec72, // CIT CmpOp::CmpS64 => 0xec70, // CGIT @@ -1443,6 +1524,8 @@ impl MachInstEmit for Inst { cond, trap_code, } => { + let rn = allocs.next(rn); + let opcode = match op { CmpOp::CmpL32 => 0xec73, // CLFIT CmpOp::CmpL64 => 0xec71, // CLGIT @@ -1463,6 +1546,10 @@ impl MachInstEmit for Inst { rn, ref mem, } => { + let rd = allocs.next_writable(rd); + let rn = allocs.next(rn); + let mem = mem.with_allocs(&mut allocs); + let opcode = match alu_op { ALUOp::Add32 => 0xebf8, // LAA ALUOp::Add64 => 0xebe8, // LAAG @@ -1481,7 +1568,7 @@ impl MachInstEmit for Inst { mem_rs_emit( rd, rn, - mem, + &mem, None, Some(opcode), true, @@ -1500,6 +1587,8 @@ impl MachInstEmit for Inst { sink.bind_label(loop_label); for inst in (&body).into_iter() { + let op_count = count_operands(inst); + let sub_allocs = allocs.next_n(op_count); match &inst { // Replace a CondBreak with a branch to done_label. &Inst::CondBreak { cond } => { @@ -1507,9 +1596,9 @@ impl MachInstEmit for Inst { target: done_label, cond: *cond, }; - inst.emit(sink, emit_info, state); + inst.emit(&sub_allocs[..], sink, emit_info, state); } - _ => inst.emit(sink, emit_info, state), + _ => inst.emit(&sub_allocs[..], sink, emit_info, state), }; } @@ -1517,13 +1606,17 @@ impl MachInstEmit for Inst { target: loop_label, cond, }; - inst.emit(sink, emit_info, state); + inst.emit(&[], sink, emit_info, state); // Emit label at the end of the loop. sink.bind_label(done_label); } &Inst::CondBreak { .. } => unreachable!(), // Only valid inside a Loop. &Inst::AtomicCas32 { rd, rn, ref mem } | &Inst::AtomicCas64 { rd, rn, ref mem } => { + let rd = allocs.next_writable(rd); + let rn = allocs.next(rn); + let mem = mem.with_allocs(&mut allocs); + let (opcode_rs, opcode_rsy) = match self { &Inst::AtomicCas32 { .. } => (Some(0xba), Some(0xeb14)), // CS(Y) &Inst::AtomicCas64 { .. } => (None, Some(0xeb30)), // CSG @@ -1532,7 +1625,7 @@ impl MachInstEmit for Inst { let rd = rd.to_reg(); mem_rs_emit( - rd, rn, mem, opcode_rs, opcode_rsy, true, sink, emit_info, state, + rd, rn, &mem, opcode_rs, opcode_rsy, true, sink, emit_info, state, ); } &Inst::Fence => { @@ -1556,6 +1649,9 @@ impl MachInstEmit for Inst { | &Inst::LoadRev64 { rd, ref mem } | &Inst::FpuLoad32 { rd, ref mem } | &Inst::FpuLoad64 { rd, ref mem } => { + let rd = allocs.next_writable(rd); + let mem = mem.with_allocs(&mut allocs); + let (opcode_rx, opcode_rxy, opcode_ril) = match self { &Inst::Load32 { .. } => (Some(0x58), Some(0xe358), Some(0xc4d)), // L(Y), LRL &Inst::Load32ZExt8 { .. } => (None, Some(0xe394), None), // LLC @@ -1578,19 +1674,22 @@ impl MachInstEmit for Inst { }; let rd = rd.to_reg(); mem_emit( - rd, mem, opcode_rx, opcode_rxy, opcode_ril, true, sink, emit_info, state, + rd, &mem, opcode_rx, opcode_rxy, opcode_ril, true, sink, emit_info, state, ); } &Inst::FpuLoadRev32 { rd, ref mem } | &Inst::FpuLoadRev64 { rd, ref mem } => { + let rd = allocs.next_writable(rd); + let mem = mem.with_allocs(&mut allocs); + let opcode = match self { &Inst::FpuLoadRev32 { .. } => 0xe603, // VLEBRF &Inst::FpuLoadRev64 { .. } => 0xe602, // VLEBRG _ => unreachable!(), }; - let (mem_insts, mem) = mem_finalize(mem, state, true, false, false, true); + let (mem_insts, mem) = mem_finalize(&mem, state, true, false, false, true); for inst in mem_insts.into_iter() { - inst.emit(sink, emit_info, state); + inst.emit(&[], sink, emit_info, state); } let srcloc = state.cur_srcloc(); @@ -1620,6 +1719,9 @@ impl MachInstEmit for Inst { | &Inst::StoreRev64 { rd, ref mem } | &Inst::FpuStore32 { rd, ref mem } | &Inst::FpuStore64 { rd, ref mem } => { + let rd = allocs.next(rd); + let mem = mem.with_allocs(&mut allocs); + let (opcode_rx, opcode_rxy, opcode_ril) = match self { &Inst::Store8 { .. } => (Some(0x42), Some(0xe372), None), // STC(Y) &Inst::Store16 { .. } => (Some(0x40), Some(0xe370), Some(0xc47)), // STH(Y), STHRL @@ -1633,37 +1735,44 @@ impl MachInstEmit for Inst { _ => unreachable!(), }; mem_emit( - rd, mem, opcode_rx, opcode_rxy, opcode_ril, true, sink, emit_info, state, + rd, &mem, opcode_rx, opcode_rxy, opcode_ril, true, sink, emit_info, state, ); } &Inst::StoreImm8 { imm, ref mem } => { + let mem = mem.with_allocs(&mut allocs); + let opcode_si = 0x92; // MVI let opcode_siy = 0xeb52; // MVIY mem_imm8_emit( - imm, mem, opcode_si, opcode_siy, true, sink, emit_info, state, + imm, &mem, opcode_si, opcode_siy, true, sink, emit_info, state, ); } &Inst::StoreImm16 { imm, ref mem } | &Inst::StoreImm32SExt16 { imm, ref mem } | &Inst::StoreImm64SExt16 { imm, ref mem } => { + let mem = mem.with_allocs(&mut allocs); + let opcode = match self { &Inst::StoreImm16 { .. } => 0xe544, // MVHHI &Inst::StoreImm32SExt16 { .. } => 0xe54c, // MVHI &Inst::StoreImm64SExt16 { .. } => 0xe548, // MVGHI _ => unreachable!(), }; - mem_imm16_emit(imm, mem, opcode, true, sink, emit_info, state); + mem_imm16_emit(imm, &mem, opcode, true, sink, emit_info, state); } &Inst::FpuStoreRev32 { rd, ref mem } | &Inst::FpuStoreRev64 { rd, ref mem } => { + let rd = allocs.next(rd); + let mem = mem.with_allocs(&mut allocs); + let opcode = match self { &Inst::FpuStoreRev32 { .. } => 0xe60b, // VSTEBRF &Inst::FpuStoreRev64 { .. } => 0xe60a, // VSTEBRG _ => unreachable!(), }; - let (mem_insts, mem) = mem_finalize(mem, state, true, false, false, true); + let (mem_insts, mem) = mem_finalize(&mem, state, true, false, false, true); for inst in mem_insts.into_iter() { - inst.emit(sink, emit_info, state); + inst.emit(&[], sink, emit_info, state); } let srcloc = state.cur_srcloc(); @@ -1682,6 +1791,8 @@ impl MachInstEmit for Inst { } &Inst::LoadMultiple64 { rt, rt2, ref mem } => { + let mem = mem.with_allocs(&mut allocs); + let opcode = 0xeb04; // LMG let rt = rt.to_reg(); let rt2 = rt2.to_reg(); @@ -1698,6 +1809,8 @@ impl MachInstEmit for Inst { ); } &Inst::StoreMultiple64 { rt, rt2, ref mem } => { + let mem = mem.with_allocs(&mut allocs); + let opcode = 0xeb24; // STMG mem_rs_emit( rt, @@ -1713,48 +1826,73 @@ impl MachInstEmit for Inst { } &Inst::LoadAddr { rd, ref mem } => { + let rd = allocs.next_writable(rd); + let mem = mem.with_allocs(&mut allocs); + let opcode_rx = Some(0x41); // LA let opcode_rxy = Some(0xe371); // LAY let opcode_ril = Some(0xc00); // LARL let rd = rd.to_reg(); mem_emit( - rd, mem, opcode_rx, opcode_rxy, opcode_ril, false, sink, emit_info, state, + rd, &mem, opcode_rx, opcode_rxy, opcode_ril, false, sink, emit_info, state, ); } &Inst::Mov64 { rd, rm } => { + let rd = allocs.next_writable(rd); + let rm = allocs.next(rm); + let opcode = 0xb904; // LGR put(sink, &enc_rre(opcode, rd.to_reg(), rm)); } &Inst::Mov32 { rd, rm } => { + let rd = allocs.next_writable(rd); + let rm = allocs.next(rm); + let opcode = 0x18; // LR put(sink, &enc_rr(opcode, rd.to_reg(), rm)); } &Inst::Mov32Imm { rd, imm } => { + let rd = allocs.next_writable(rd); + let opcode = 0xc09; // IILF put(sink, &enc_ril_a(opcode, rd.to_reg(), imm)); } &Inst::Mov32SImm16 { rd, imm } => { + let rd = allocs.next_writable(rd); + let opcode = 0xa78; // LHI put(sink, &enc_ri_a(opcode, rd.to_reg(), imm as u16)); } &Inst::Mov64SImm16 { rd, imm } => { + let rd = allocs.next_writable(rd); + let opcode = 0xa79; // LGHI put(sink, &enc_ri_a(opcode, rd.to_reg(), imm as u16)); } &Inst::Mov64SImm32 { rd, imm } => { + let rd = allocs.next_writable(rd); + let opcode = 0xc01; // LGFI put(sink, &enc_ril_a(opcode, rd.to_reg(), imm as u32)); } &Inst::CMov32 { rd, cond, rm } => { + let rd = allocs.next_writable(rd); + let rm = allocs.next(rm); + let opcode = 0xb9f2; // LOCR put(sink, &enc_rrf_cde(opcode, rd.to_reg(), rm, cond.bits(), 0)); } &Inst::CMov64 { rd, cond, rm } => { + let rd = allocs.next_writable(rd); + let rm = allocs.next(rm); + let opcode = 0xb9e2; // LOCGR put(sink, &enc_rrf_cde(opcode, rd.to_reg(), rm, cond.bits(), 0)); } &Inst::CMov32SImm16 { rd, cond, imm } => { + let rd = allocs.next_writable(rd); + let opcode = 0xec42; // LOCHI put( sink, @@ -1762,6 +1900,8 @@ impl MachInstEmit for Inst { ); } &Inst::CMov64SImm16 { rd, cond, imm } => { + let rd = allocs.next_writable(rd); + let opcode = 0xec46; // LOCGHI put( sink, @@ -1769,6 +1909,8 @@ impl MachInstEmit for Inst { ); } &Inst::Mov64UImm16Shifted { rd, imm } => { + let rd = allocs.next_writable(rd); + let opcode = match imm.shift { 0 => 0xa5f, // LLILL 1 => 0xa5e, // LLILH @@ -1779,6 +1921,8 @@ impl MachInstEmit for Inst { put(sink, &enc_ri_a(opcode, rd.to_reg(), imm.bits)); } &Inst::Mov64UImm32Shifted { rd, imm } => { + let rd = allocs.next_writable(rd); + let opcode = match imm.shift { 0 => 0xc0f, // LLILF 1 => 0xc0e, // LLIHF @@ -1787,6 +1931,8 @@ impl MachInstEmit for Inst { put(sink, &enc_ril_a(opcode, rd.to_reg(), imm.bits)); } &Inst::Insert64UImm16Shifted { rd, imm } => { + let rd = allocs.next_writable(rd); + let opcode = match imm.shift { 0 => 0xa53, // IILL 1 => 0xa52, // IILH @@ -1797,6 +1943,8 @@ impl MachInstEmit for Inst { put(sink, &enc_ri_a(opcode, rd.to_reg(), imm.bits)); } &Inst::Insert64UImm32Shifted { rd, imm } => { + let rd = allocs.next_writable(rd); + let opcode = match imm.shift { 0 => 0xc09, // IILF 1 => 0xc08, // IIHF @@ -1809,6 +1957,8 @@ impl MachInstEmit for Inst { ref name, offset, } => { + let rd = allocs.next_writable(rd); + let opcode = 0xa75; // BRAS let srcloc = state.cur_srcloc(); let reg = writable_spilltmp_reg().to_reg(); @@ -1823,38 +1973,58 @@ impl MachInstEmit for Inst { rd, mem: MemArg::reg(reg, MemFlags::trusted()), }; - inst.emit(sink, emit_info, state); + inst.emit(&[], sink, emit_info, state); } &Inst::FpuMove32 { rd, rn } => { + let rd = allocs.next_writable(rd); + let rn = allocs.next(rn); + let opcode = 0x38; // LER put(sink, &enc_rr(opcode, rd.to_reg(), rn)); } &Inst::FpuMove64 { rd, rn } => { + let rd = allocs.next_writable(rd); + let rn = allocs.next(rn); + let opcode = 0x28; // LDR put(sink, &enc_rr(opcode, rd.to_reg(), rn)); } &Inst::FpuCMov32 { rd, cond, rm } => { + let rd = allocs.next_writable(rd); + let rm = allocs.next(rm); + let opcode = 0xa74; // BCR put(sink, &enc_ri_c(opcode, cond.invert().bits(), 4 + 2)); let opcode = 0x38; // LER put(sink, &enc_rr(opcode, rd.to_reg(), rm)); } &Inst::FpuCMov64 { rd, cond, rm } => { + let rd = allocs.next_writable(rd); + let rm = allocs.next(rm); + let opcode = 0xa74; // BCR put(sink, &enc_ri_c(opcode, cond.invert().bits(), 4 + 2)); let opcode = 0x28; // LDR put(sink, &enc_rr(opcode, rd.to_reg(), rm)); } &Inst::MovToFpr { rd, rn } => { + let rd = allocs.next_writable(rd); + let rn = allocs.next(rn); + let opcode = 0xb3c1; // LDGR put(sink, &enc_rre(opcode, rd.to_reg(), rn)); } &Inst::MovFromFpr { rd, rn } => { + let rd = allocs.next_writable(rd); + let rn = allocs.next(rn); + let opcode = 0xb3cd; // LGDR put(sink, &enc_rre(opcode, rd.to_reg(), rn)); } &Inst::LoadFpuConst32 { rd, const_data } => { + let rd = allocs.next_writable(rd); + let opcode = 0xa75; // BRAS let reg = writable_spilltmp_reg().to_reg(); put(sink, &enc_ri_b(opcode, reg, 8)); @@ -1863,9 +2033,11 @@ impl MachInstEmit for Inst { rd, mem: MemArg::reg(reg, MemFlags::trusted()), }; - inst.emit(sink, emit_info, state); + inst.emit(&[], sink, emit_info, state); } &Inst::LoadFpuConst64 { rd, const_data } => { + let rd = allocs.next_writable(rd); + let opcode = 0xa75; // BRAS let reg = writable_spilltmp_reg().to_reg(); put(sink, &enc_ri_b(opcode, reg, 12)); @@ -1874,14 +2046,21 @@ impl MachInstEmit for Inst { rd, mem: MemArg::reg(reg, MemFlags::trusted()), }; - inst.emit(sink, emit_info, state); + inst.emit(&[], sink, emit_info, state); } &Inst::FpuCopysign { rd, rn, rm } => { + let rd = allocs.next_writable(rd); + let rn = allocs.next(rn); + let rm = allocs.next(rm); + let opcode = 0xb372; // CPSDR put(sink, &enc_rrf_ab(opcode, rd.to_reg(), rn, rm, 0)); } &Inst::FpuRR { fpu_op, rd, rn } => { + let rd = allocs.next_writable(rd); + let rn = allocs.next(rn); + let opcode = match fpu_op { FPUOp1::Abs32 => 0xb300, // LPEBR FPUOp1::Abs64 => 0xb310, // LPDBR @@ -1897,6 +2076,9 @@ impl MachInstEmit for Inst { put(sink, &enc_rre(opcode, rd.to_reg(), rn)); } &Inst::FpuRRR { fpu_op, rd, rm } => { + let rd = allocs.next_writable(rd); + let rm = allocs.next(rm); + let opcode = match fpu_op { FPUOp2::Add32 => 0xb30a, // AEBR FPUOp2::Add64 => 0xb31a, // ADBR @@ -1911,6 +2093,10 @@ impl MachInstEmit for Inst { put(sink, &enc_rre(opcode, rd.to_reg(), rm)); } &Inst::FpuRRRR { fpu_op, rd, rn, rm } => { + let rd = allocs.next_writable(rd); + let rn = allocs.next(rn); + let rm = allocs.next(rm); + let opcode = match fpu_op { FPUOp3::MAdd32 => 0xb30e, // MAEBR FPUOp3::MAdd64 => 0xb31e, // MADBR @@ -1920,6 +2106,9 @@ impl MachInstEmit for Inst { put(sink, &enc_rrd(opcode, rd.to_reg(), rm, rn)); } &Inst::FpuToInt { op, rd, rn } => { + let rd = allocs.next_writable(rd); + let rn = allocs.next(rn); + let opcode = match op { FpuToIntOp::F32ToI32 => 0xb398, // CFEBRA FpuToIntOp::F32ToU32 => 0xb39c, // CLFEBR @@ -1933,6 +2122,9 @@ impl MachInstEmit for Inst { put(sink, &enc_rrf_cde(opcode, rd.to_reg(), rn, 5, 0)); } &Inst::IntToFpu { op, rd, rn } => { + let rd = allocs.next_writable(rd); + let rn = allocs.next(rn); + let opcode = match op { IntToFpuOp::I32ToF32 => 0xb394, // CEFBRA IntToFpuOp::U32ToF32 => 0xb390, // CELFBR @@ -1946,6 +2138,9 @@ impl MachInstEmit for Inst { put(sink, &enc_rrf_cde(opcode, rd.to_reg(), rn, 0, 0)); } &Inst::FpuRound { op, rd, rn } => { + let rd = allocs.next_writable(rd); + let rn = allocs.next(rn); + let (opcode, m3) = match op { FpuRoundMode::Minus32 => (0xb357, 7), // FIEBR FpuRoundMode::Minus64 => (0xb35f, 7), // FIDBR @@ -1959,6 +2154,10 @@ impl MachInstEmit for Inst { put(sink, &enc_rrf_cde(opcode, rd.to_reg(), rn, m3, 0)); } &Inst::FpuVecRRR { fpu_op, rd, rn, rm } => { + let rd = allocs.next_writable(rd); + let rn = allocs.next(rn); + let rm = allocs.next(rm); + let (opcode, m4) = match fpu_op { FPUOp2::Max32 => (0xe7ef, 2), // VFMAX FPUOp2::Max64 => (0xe7ef, 3), // VFMAX @@ -1969,15 +2168,23 @@ impl MachInstEmit for Inst { put(sink, &enc_vrr(opcode, rd.to_reg(), rn, rm, m4, 8, 1)); } &Inst::FpuCmp32 { rn, rm } => { + let rn = allocs.next(rn); + let rm = allocs.next(rm); + let opcode = 0xb309; // CEBR put(sink, &enc_rre(opcode, rn, rm)); } &Inst::FpuCmp64 { rn, rm } => { + let rn = allocs.next(rn); + let rm = allocs.next(rm); + let opcode = 0xb319; // CDBR put(sink, &enc_rre(opcode, rn, rm)); } &Inst::Call { link, ref info } => { + let link = allocs.next_writable(link); + let opcode = 0xc05; // BRASL let reloc = Reloc::S390xPCRel32Dbl; let srcloc = state.cur_srcloc(); @@ -1998,17 +2205,22 @@ impl MachInstEmit for Inst { } } &Inst::CallInd { link, ref info } => { + let link = allocs.next_writable(link); + let rn = allocs.next(info.rn); + let opcode = 0x0d; // BASR let srcloc = state.cur_srcloc(); if let Some(s) = state.take_stack_map() { sink.add_stack_map(StackMapExtent::UpcomingBytes(2), s); } - put(sink, &enc_rr(opcode, link.to_reg(), info.rn)); + put(sink, &enc_rr(opcode, link.to_reg(), rn)); if info.opcode.is_call() { sink.add_call_site(srcloc, info.opcode); } } - &Inst::Ret { link } => { + &Inst::Ret { link, .. } => { + let link = allocs.next(link); + let opcode = 0x07; // BCR put(sink, &enc_rr(opcode, gpr(15), link)); } @@ -2025,6 +2237,8 @@ impl MachInstEmit for Inst { put(sink, &enc_ril_c(opcode, 15, 0)); } &Inst::IndirectBr { rn, .. } => { + let rn = allocs.next(rn); + let opcode = 0x07; // BCR put(sink, &enc_rr(opcode, gpr(15), rn)); } @@ -2079,6 +2293,8 @@ impl MachInstEmit for Inst { put_with_trap(sink, &enc_e(0x0000), srcloc, trap_code); } &Inst::JTSequence { ridx, ref targets } => { + let ridx = allocs.next(ridx); + let table_label = sink.get_label(); // This sequence is *one* instruction in the vcode, and is expanded only here at @@ -2093,7 +2309,7 @@ impl MachInstEmit for Inst { target: table_label, }, }; - inst.emit(sink, emit_info, state); + inst.emit(&[], sink, emit_info, state); // Set temp to target address by adding the value of the jump table entry. let inst = Inst::AluRX { @@ -2101,7 +2317,7 @@ impl MachInstEmit for Inst { rd: rtmp, mem: MemArg::reg_plus_reg(rtmp.to_reg(), ridx, MemFlags::trusted()), }; - inst.emit(sink, emit_info, state); + inst.emit(&[], sink, emit_info, state); // Branch to computed address. (`targets` here is only used for successor queries // and is not needed for emission.) @@ -2109,7 +2325,7 @@ impl MachInstEmit for Inst { rn: rtmp.to_reg(), targets: vec![], }; - inst.emit(sink, emit_info, state); + inst.emit(&[], sink, emit_info, state); // Emit jump table (table of 32-bit offsets). // The first entry is the default target, which is not emitted @@ -2138,13 +2354,11 @@ impl MachInstEmit for Inst { state.virtual_sp_offset += offset; } - &Inst::ValueLabelMarker { .. } => { - // Nothing; this is only used to compute debug info. - } - &Inst::Unwind { ref inst } => { sink.add_unwind(inst.clone()); } + + &Inst::DummyUse { .. } => {} } let end_off = sink.cur_offset(); @@ -2153,7 +2367,8 @@ impl MachInstEmit for Inst { state.clear_post_insn(); } - fn pretty_print(&self, mb_rru: Option<&RealRegUniverse>, state: &mut EmitState) -> String { - self.print_with_state(mb_rru, state) + fn pretty_print_inst(&self, allocs: &[Allocation], state: &mut EmitState) -> String { + let mut allocs = AllocationConsumer::new(allocs); + self.print_with_state(state, &mut allocs) } } diff --git a/cranelift/codegen/src/isa/s390x/inst/emit_tests.rs b/cranelift/codegen/src/isa/s390x/inst/emit_tests.rs index ed2310e288..225ea132dd 100644 --- a/cranelift/codegen/src/isa/s390x/inst/emit_tests.rs +++ b/cranelift/codegen/src/isa/s390x/inst/emit_tests.rs @@ -1585,7 +1585,7 @@ fn test_s390x_binemit() { op: CmpOp::CmpS32, rn: gpr(1), mem: MemArg::Label { - target: MachLabel::from_block(1), + target: MachLabel::from_block(BlockIndex::new(1)), }, }, "C61D00000003", @@ -1624,7 +1624,7 @@ fn test_s390x_binemit() { op: CmpOp::CmpS32Ext16, rn: gpr(1), mem: MemArg::Label { - target: MachLabel::from_block(1), + target: MachLabel::from_block(BlockIndex::new(1)), }, }, "C61500000003", @@ -1649,7 +1649,7 @@ fn test_s390x_binemit() { op: CmpOp::CmpS64, rn: gpr(1), mem: MemArg::Label { - target: MachLabel::from_block(1), + target: MachLabel::from_block(BlockIndex::new(1)), }, }, "C61800000003", @@ -1674,7 +1674,7 @@ fn test_s390x_binemit() { op: CmpOp::CmpS64Ext16, rn: gpr(1), mem: MemArg::Label { - target: MachLabel::from_block(1), + target: MachLabel::from_block(BlockIndex::new(1)), }, }, "C61400000003", @@ -1699,7 +1699,7 @@ fn test_s390x_binemit() { op: CmpOp::CmpS64Ext32, rn: gpr(1), mem: MemArg::Label { - target: MachLabel::from_block(1), + target: MachLabel::from_block(BlockIndex::new(1)), }, }, "C61C00000003", @@ -1738,7 +1738,7 @@ fn test_s390x_binemit() { op: CmpOp::CmpL32, rn: gpr(1), mem: MemArg::Label { - target: MachLabel::from_block(1), + target: MachLabel::from_block(BlockIndex::new(1)), }, }, "C61F00000003", @@ -1749,7 +1749,7 @@ fn test_s390x_binemit() { op: CmpOp::CmpL32Ext16, rn: gpr(1), mem: MemArg::Label { - target: MachLabel::from_block(1), + target: MachLabel::from_block(BlockIndex::new(1)), }, }, "C61700000003", @@ -1774,7 +1774,7 @@ fn test_s390x_binemit() { op: CmpOp::CmpL64, rn: gpr(1), mem: MemArg::Label { - target: MachLabel::from_block(1), + target: MachLabel::from_block(BlockIndex::new(1)), }, }, "C61A00000003", @@ -1785,7 +1785,7 @@ fn test_s390x_binemit() { op: CmpOp::CmpL64Ext16, rn: gpr(1), mem: MemArg::Label { - target: MachLabel::from_block(1), + target: MachLabel::from_block(BlockIndex::new(1)), }, }, "C61600000003", @@ -1810,7 +1810,7 @@ fn test_s390x_binemit() { op: CmpOp::CmpL64Ext32, rn: gpr(1), mem: MemArg::Label { - target: MachLabel::from_block(1), + target: MachLabel::from_block(BlockIndex::new(1)), }, }, "C61E00000003", @@ -4536,7 +4536,7 @@ fn test_s390x_binemit() { Inst::Load32 { rd: writable_gpr(1), mem: MemArg::Label { - target: MachLabel::from_block(1), + target: MachLabel::from_block(BlockIndex::new(1)), }, }, "C41D00000003", @@ -4546,7 +4546,7 @@ fn test_s390x_binemit() { Inst::Load32SExt16 { rd: writable_gpr(1), mem: MemArg::Label { - target: MachLabel::from_block(1), + target: MachLabel::from_block(BlockIndex::new(1)), }, }, "C41500000003", @@ -4556,7 +4556,7 @@ fn test_s390x_binemit() { Inst::Load32ZExt16 { rd: writable_gpr(1), mem: MemArg::Label { - target: MachLabel::from_block(1), + target: MachLabel::from_block(BlockIndex::new(1)), }, }, "C41200000003", @@ -4566,7 +4566,7 @@ fn test_s390x_binemit() { Inst::Load64 { rd: writable_gpr(1), mem: MemArg::Label { - target: MachLabel::from_block(1), + target: MachLabel::from_block(BlockIndex::new(1)), }, }, "C41800000003", @@ -4576,7 +4576,7 @@ fn test_s390x_binemit() { Inst::Load64SExt16 { rd: writable_gpr(1), mem: MemArg::Label { - target: MachLabel::from_block(1), + target: MachLabel::from_block(BlockIndex::new(1)), }, }, "C41400000003", @@ -4586,7 +4586,7 @@ fn test_s390x_binemit() { Inst::Load64ZExt16 { rd: writable_gpr(1), mem: MemArg::Label { - target: MachLabel::from_block(1), + target: MachLabel::from_block(BlockIndex::new(1)), }, }, "C41600000003", @@ -4596,7 +4596,7 @@ fn test_s390x_binemit() { Inst::Load64SExt32 { rd: writable_gpr(1), mem: MemArg::Label { - target: MachLabel::from_block(1), + target: MachLabel::from_block(BlockIndex::new(1)), }, }, "C41C00000003", @@ -4606,7 +4606,7 @@ fn test_s390x_binemit() { Inst::Load64ZExt32 { rd: writable_gpr(1), mem: MemArg::Label { - target: MachLabel::from_block(1), + target: MachLabel::from_block(BlockIndex::new(1)), }, }, "C41E00000003", @@ -5790,7 +5790,7 @@ fn test_s390x_binemit() { Inst::Store16 { rd: gpr(1), mem: MemArg::Label { - target: MachLabel::from_block(1), + target: MachLabel::from_block(BlockIndex::new(1)), }, }, "C41700000003", @@ -5800,7 +5800,7 @@ fn test_s390x_binemit() { Inst::Store32 { rd: gpr(1), mem: MemArg::Label { - target: MachLabel::from_block(1), + target: MachLabel::from_block(BlockIndex::new(1)), }, }, "C41F00000003", @@ -5810,7 +5810,7 @@ fn test_s390x_binemit() { Inst::Store64 { rd: gpr(1), mem: MemArg::Label { - target: MachLabel::from_block(1), + target: MachLabel::from_block(BlockIndex::new(1)), }, }, "C41B00000003", @@ -6035,7 +6035,7 @@ fn test_s390x_binemit() { Inst::LoadAddr { rd: writable_gpr(1), mem: MemArg::Label { - target: MachLabel::from_block(1), + target: MachLabel::from_block(BlockIndex::new(1)), }, }, "C01000000003", @@ -6499,7 +6499,7 @@ fn test_s390x_binemit() { insns.push(( Inst::Jump { - dest: MachLabel::from_block(0), + dest: MachLabel::from_block(BlockIndex::new(0)), }, "C0F400000000", "jg label0", @@ -6507,7 +6507,7 @@ fn test_s390x_binemit() { insns.push(( Inst::OneWayCondBr { - target: MachLabel::from_block(0), + target: MachLabel::from_block(BlockIndex::new(0)), cond: Cond::from_mask(1), }, "C01400000000", @@ -6515,7 +6515,7 @@ fn test_s390x_binemit() { )); insns.push(( Inst::OneWayCondBr { - target: MachLabel::from_block(0), + target: MachLabel::from_block(BlockIndex::new(0)), cond: Cond::from_mask(2), }, "C02400000000", @@ -6523,7 +6523,7 @@ fn test_s390x_binemit() { )); insns.push(( Inst::OneWayCondBr { - target: MachLabel::from_block(0), + target: MachLabel::from_block(BlockIndex::new(0)), cond: Cond::from_mask(3), }, "C03400000000", @@ -6531,7 +6531,7 @@ fn test_s390x_binemit() { )); insns.push(( Inst::OneWayCondBr { - target: MachLabel::from_block(0), + target: MachLabel::from_block(BlockIndex::new(0)), cond: Cond::from_mask(4), }, "C04400000000", @@ -6539,7 +6539,7 @@ fn test_s390x_binemit() { )); insns.push(( Inst::OneWayCondBr { - target: MachLabel::from_block(0), + target: MachLabel::from_block(BlockIndex::new(0)), cond: Cond::from_mask(5), }, "C05400000000", @@ -6547,7 +6547,7 @@ fn test_s390x_binemit() { )); insns.push(( Inst::OneWayCondBr { - target: MachLabel::from_block(0), + target: MachLabel::from_block(BlockIndex::new(0)), cond: Cond::from_mask(6), }, "C06400000000", @@ -6555,7 +6555,7 @@ fn test_s390x_binemit() { )); insns.push(( Inst::OneWayCondBr { - target: MachLabel::from_block(0), + target: MachLabel::from_block(BlockIndex::new(0)), cond: Cond::from_mask(7), }, "C07400000000", @@ -6563,7 +6563,7 @@ fn test_s390x_binemit() { )); insns.push(( Inst::OneWayCondBr { - target: MachLabel::from_block(0), + target: MachLabel::from_block(BlockIndex::new(0)), cond: Cond::from_mask(8), }, "C08400000000", @@ -6571,7 +6571,7 @@ fn test_s390x_binemit() { )); insns.push(( Inst::OneWayCondBr { - target: MachLabel::from_block(0), + target: MachLabel::from_block(BlockIndex::new(0)), cond: Cond::from_mask(9), }, "C09400000000", @@ -6579,7 +6579,7 @@ fn test_s390x_binemit() { )); insns.push(( Inst::OneWayCondBr { - target: MachLabel::from_block(0), + target: MachLabel::from_block(BlockIndex::new(0)), cond: Cond::from_mask(10), }, "C0A400000000", @@ -6587,7 +6587,7 @@ fn test_s390x_binemit() { )); insns.push(( Inst::OneWayCondBr { - target: MachLabel::from_block(0), + target: MachLabel::from_block(BlockIndex::new(0)), cond: Cond::from_mask(11), }, "C0B400000000", @@ -6595,7 +6595,7 @@ fn test_s390x_binemit() { )); insns.push(( Inst::OneWayCondBr { - target: MachLabel::from_block(0), + target: MachLabel::from_block(BlockIndex::new(0)), cond: Cond::from_mask(12), }, "C0C400000000", @@ -6603,7 +6603,7 @@ fn test_s390x_binemit() { )); insns.push(( Inst::OneWayCondBr { - target: MachLabel::from_block(0), + target: MachLabel::from_block(BlockIndex::new(0)), cond: Cond::from_mask(13), }, "C0D400000000", @@ -6611,7 +6611,7 @@ fn test_s390x_binemit() { )); insns.push(( Inst::OneWayCondBr { - target: MachLabel::from_block(0), + target: MachLabel::from_block(BlockIndex::new(0)), cond: Cond::from_mask(14), }, "C0E400000000", @@ -6620,8 +6620,8 @@ fn test_s390x_binemit() { insns.push(( Inst::CondBr { - taken: MachLabel::from_block(0), - not_taken: MachLabel::from_block(0), + taken: MachLabel::from_block(BlockIndex::new(0)), + not_taken: MachLabel::from_block(BlockIndex::new(0)), cond: Cond::from_mask(1), }, "C01400000000C0F4FFFFFFFD", @@ -6629,8 +6629,8 @@ fn test_s390x_binemit() { )); insns.push(( Inst::CondBr { - taken: MachLabel::from_block(0), - not_taken: MachLabel::from_block(0), + taken: MachLabel::from_block(BlockIndex::new(0)), + not_taken: MachLabel::from_block(BlockIndex::new(0)), cond: Cond::from_mask(2), }, "C02400000000C0F4FFFFFFFD", @@ -6638,8 +6638,8 @@ fn test_s390x_binemit() { )); insns.push(( Inst::CondBr { - taken: MachLabel::from_block(0), - not_taken: MachLabel::from_block(0), + taken: MachLabel::from_block(BlockIndex::new(0)), + not_taken: MachLabel::from_block(BlockIndex::new(0)), cond: Cond::from_mask(3), }, "C03400000000C0F4FFFFFFFD", @@ -6647,8 +6647,8 @@ fn test_s390x_binemit() { )); insns.push(( Inst::CondBr { - taken: MachLabel::from_block(0), - not_taken: MachLabel::from_block(0), + taken: MachLabel::from_block(BlockIndex::new(0)), + not_taken: MachLabel::from_block(BlockIndex::new(0)), cond: Cond::from_mask(4), }, "C04400000000C0F4FFFFFFFD", @@ -6656,8 +6656,8 @@ fn test_s390x_binemit() { )); insns.push(( Inst::CondBr { - taken: MachLabel::from_block(0), - not_taken: MachLabel::from_block(0), + taken: MachLabel::from_block(BlockIndex::new(0)), + not_taken: MachLabel::from_block(BlockIndex::new(0)), cond: Cond::from_mask(5), }, "C05400000000C0F4FFFFFFFD", @@ -6665,8 +6665,8 @@ fn test_s390x_binemit() { )); insns.push(( Inst::CondBr { - taken: MachLabel::from_block(0), - not_taken: MachLabel::from_block(0), + taken: MachLabel::from_block(BlockIndex::new(0)), + not_taken: MachLabel::from_block(BlockIndex::new(0)), cond: Cond::from_mask(6), }, "C06400000000C0F4FFFFFFFD", @@ -6674,8 +6674,8 @@ fn test_s390x_binemit() { )); insns.push(( Inst::CondBr { - taken: MachLabel::from_block(0), - not_taken: MachLabel::from_block(0), + taken: MachLabel::from_block(BlockIndex::new(0)), + not_taken: MachLabel::from_block(BlockIndex::new(0)), cond: Cond::from_mask(7), }, "C07400000000C0F4FFFFFFFD", @@ -6683,8 +6683,8 @@ fn test_s390x_binemit() { )); insns.push(( Inst::CondBr { - taken: MachLabel::from_block(0), - not_taken: MachLabel::from_block(0), + taken: MachLabel::from_block(BlockIndex::new(0)), + not_taken: MachLabel::from_block(BlockIndex::new(0)), cond: Cond::from_mask(8), }, "C08400000000C0F4FFFFFFFD", @@ -6692,8 +6692,8 @@ fn test_s390x_binemit() { )); insns.push(( Inst::CondBr { - taken: MachLabel::from_block(0), - not_taken: MachLabel::from_block(0), + taken: MachLabel::from_block(BlockIndex::new(0)), + not_taken: MachLabel::from_block(BlockIndex::new(0)), cond: Cond::from_mask(9), }, "C09400000000C0F4FFFFFFFD", @@ -6701,8 +6701,8 @@ fn test_s390x_binemit() { )); insns.push(( Inst::CondBr { - taken: MachLabel::from_block(0), - not_taken: MachLabel::from_block(0), + taken: MachLabel::from_block(BlockIndex::new(0)), + not_taken: MachLabel::from_block(BlockIndex::new(0)), cond: Cond::from_mask(10), }, "C0A400000000C0F4FFFFFFFD", @@ -6710,8 +6710,8 @@ fn test_s390x_binemit() { )); insns.push(( Inst::CondBr { - taken: MachLabel::from_block(0), - not_taken: MachLabel::from_block(0), + taken: MachLabel::from_block(BlockIndex::new(0)), + not_taken: MachLabel::from_block(BlockIndex::new(0)), cond: Cond::from_mask(11), }, "C0B400000000C0F4FFFFFFFD", @@ -6719,8 +6719,8 @@ fn test_s390x_binemit() { )); insns.push(( Inst::CondBr { - taken: MachLabel::from_block(0), - not_taken: MachLabel::from_block(0), + taken: MachLabel::from_block(BlockIndex::new(0)), + not_taken: MachLabel::from_block(BlockIndex::new(0)), cond: Cond::from_mask(12), }, "C0C400000000C0F4FFFFFFFD", @@ -6728,8 +6728,8 @@ fn test_s390x_binemit() { )); insns.push(( Inst::CondBr { - taken: MachLabel::from_block(0), - not_taken: MachLabel::from_block(0), + taken: MachLabel::from_block(BlockIndex::new(0)), + not_taken: MachLabel::from_block(BlockIndex::new(0)), cond: Cond::from_mask(13), }, "C0D400000000C0F4FFFFFFFD", @@ -6737,8 +6737,8 @@ fn test_s390x_binemit() { )); insns.push(( Inst::CondBr { - taken: MachLabel::from_block(0), - not_taken: MachLabel::from_block(0), + taken: MachLabel::from_block(BlockIndex::new(0)), + not_taken: MachLabel::from_block(BlockIndex::new(0)), cond: Cond::from_mask(14), }, "C0E400000000C0F4FFFFFFFD", @@ -6782,7 +6782,14 @@ fn test_s390x_binemit() { "basr %r14, %r1", )); - insns.push((Inst::Ret { link: gpr(14) }, "07FE", "br %r14")); + insns.push(( + Inst::Ret { + link: gpr(14), + rets: vec![], + }, + "07FE", + "br %r14", + )); insns.push((Inst::Debugtrap, "0001", "debugtrap")); @@ -8246,7 +8253,6 @@ fn test_s390x_binemit() { isa_flag_builder.enable("arch13").unwrap(); let isa_flags = s390x_settings::Flags::new(&flags, isa_flag_builder); - let rru = create_reg_universe(&flags); let emit_info = EmitInfo::new(flags, isa_flags); for (insn, expected_encoding, expected_printing) in insns { println!( @@ -8255,7 +8261,8 @@ fn test_s390x_binemit() { ); // Check the printed text is as expected. - let actual_printing = insn.show_rru(Some(&rru)); + let actual_printing = + insn.print_with_state(&mut EmitState::default(), &mut AllocationConsumer::new(&[])); assert_eq!(expected_printing, actual_printing); let mut buffer = MachBuffer::new(); @@ -8265,7 +8272,7 @@ fn test_s390x_binemit() { buffer.bind_label(label0); // Emit the instruction. - insn.emit(&mut buffer, &emit_info, &mut Default::default()); + insn.emit(&[], &mut buffer, &emit_info, &mut Default::default()); // Label 1 after the instruction. let label1 = buffer.get_label(); diff --git a/cranelift/codegen/src/isa/s390x/inst/imms.rs b/cranelift/codegen/src/isa/s390x/inst/imms.rs index b1a459ea68..c3922c337b 100644 --- a/cranelift/codegen/src/isa/s390x/inst/imms.rs +++ b/cranelift/codegen/src/isa/s390x/inst/imms.rs @@ -1,6 +1,6 @@ //! S390x ISA definitions: immediate constants. -use regalloc::{PrettyPrint, RealRegUniverse}; +use crate::machinst::{AllocationConsumer, PrettyPrint}; use std::string::String; /// An unsigned 12-bit immediate. @@ -207,25 +207,25 @@ impl UImm32Shifted { } impl PrettyPrint for UImm12 { - fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String { + fn pretty_print(&self, _: u8, _: &mut AllocationConsumer<'_>) -> String { format!("{}", self.value) } } impl PrettyPrint for SImm20 { - fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String { + fn pretty_print(&self, _: u8, _: &mut AllocationConsumer<'_>) -> String { format!("{}", self.value) } } impl PrettyPrint for UImm16Shifted { - fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String { + fn pretty_print(&self, _: u8, _: &mut AllocationConsumer<'_>) -> String { format!("{}", self.bits) } } impl PrettyPrint for UImm32Shifted { - fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String { + fn pretty_print(&self, _: u8, _: &mut AllocationConsumer<'_>) -> String { format!("{}", self.bits) } } diff --git a/cranelift/codegen/src/isa/s390x/inst/mod.rs b/cranelift/codegen/src/isa/s390x/inst/mod.rs index adff4a4d95..c5f7f72f43 100644 --- a/cranelift/codegen/src/isa/s390x/inst/mod.rs +++ b/cranelift/codegen/src/isa/s390x/inst/mod.rs @@ -4,19 +4,15 @@ #![allow(dead_code)] use crate::binemit::{Addend, CodeOffset, Reloc}; -use crate::ir::{types, ExternalName, Opcode, Type, ValueLabel}; +use crate::ir::{types, ExternalName, Opcode, Type}; use crate::machinst::*; use crate::{settings, CodegenError, CodegenResult}; - -use regalloc::{PrettyPrint, RegUsageCollector, RegUsageMapper}; -use regalloc::{RealRegUniverse, Reg, RegClass, SpillSlot, VirtualReg, Writable}; - use alloc::boxed::Box; use alloc::vec::Vec; use core::convert::TryFrom; +use regalloc2::VReg; use smallvec::{smallvec, SmallVec}; use std::string::{String, ToString}; - pub mod regs; pub use self::regs::*; pub mod imms; @@ -196,7 +192,6 @@ impl Inst { | Inst::Loop { .. } | Inst::CondBreak { .. } | Inst::VirtualSPOffsetAdj { .. } - | Inst::ValueLabelMarker { .. } | Inst::Unwind { .. } => InstructionSet::Base, // These depend on the opcode @@ -216,13 +211,15 @@ impl Inst { | Inst::FpuStoreRev32 { .. } | Inst::FpuLoadRev64 { .. } | Inst::FpuStoreRev64 { .. } => InstructionSet::VXRS_EXT2, + + Inst::DummyUse { .. } => InstructionSet::Base, } } /// Create a 64-bit move instruction. pub fn mov64(to_reg: Writable, from_reg: Reg) -> Inst { - assert!(to_reg.to_reg().get_class() == from_reg.get_class()); - if from_reg.get_class() == RegClass::I64 { + assert!(to_reg.to_reg().class() == from_reg.class()); + if from_reg.class() == RegClass::Int { Inst::Mov64 { rd: to_reg, rm: from_reg, @@ -237,7 +234,7 @@ impl Inst { /// Create a 32-bit move instruction. pub fn mov32(to_reg: Writable, from_reg: Reg) -> Inst { - if from_reg.get_class() == RegClass::I64 { + if from_reg.class() == RegClass::Int { Inst::Mov32 { rd: to_reg, rm: from_reg, @@ -352,140 +349,132 @@ impl Inst { //============================================================================= // Instructions: get_regs -fn memarg_regs(memarg: &MemArg, collector: &mut RegUsageCollector) { +fn memarg_operands VReg>(memarg: &MemArg, collector: &mut OperandCollector<'_, F>) { match memarg { &MemArg::BXD12 { base, index, .. } | &MemArg::BXD20 { base, index, .. } => { - if base != zero_reg() { - collector.add_use(base); - } - if index != zero_reg() { - collector.add_use(index); - } + collector.reg_use(base); + collector.reg_use(index); } &MemArg::Label { .. } | &MemArg::Symbol { .. } => {} &MemArg::RegOffset { reg, .. } => { - collector.add_use(reg); - } - &MemArg::InitialSPOffset { .. } | &MemArg::NominalSPOffset { .. } => { - collector.add_use(stack_reg()); + collector.reg_use(reg); } + &MemArg::InitialSPOffset { .. } | &MemArg::NominalSPOffset { .. } => {} } } -fn s390x_get_regs(inst: &Inst, collector: &mut RegUsageCollector) { +fn s390x_get_operands VReg>(inst: &Inst, collector: &mut OperandCollector<'_, F>) { match inst { &Inst::AluRRR { rd, rn, rm, .. } => { - collector.add_def(rd); - collector.add_use(rn); - collector.add_use(rm); + collector.reg_def(rd); + collector.reg_use(rn); + collector.reg_use(rm); } &Inst::AluRRSImm16 { rd, rn, .. } => { - collector.add_def(rd); - collector.add_use(rn); + collector.reg_def(rd); + collector.reg_use(rn); } &Inst::AluRR { rd, rm, .. } => { - collector.add_mod(rd); - collector.add_use(rm); + collector.reg_mod(rd); + collector.reg_use(rm); } &Inst::AluRX { rd, ref mem, .. } => { - collector.add_mod(rd); - memarg_regs(mem, collector); + collector.reg_mod(rd); + memarg_operands(mem, collector); } &Inst::AluRSImm16 { rd, .. } => { - collector.add_mod(rd); + collector.reg_mod(rd); } &Inst::AluRSImm32 { rd, .. } => { - collector.add_mod(rd); + collector.reg_mod(rd); } &Inst::AluRUImm32 { rd, .. } => { - collector.add_mod(rd); + collector.reg_mod(rd); } &Inst::AluRUImm16Shifted { rd, .. } => { - collector.add_mod(rd); + collector.reg_mod(rd); } &Inst::AluRUImm32Shifted { rd, .. } => { - collector.add_mod(rd); + collector.reg_mod(rd); } &Inst::SMulWide { rn, rm, .. } => { - collector.add_def(writable_gpr(0)); - collector.add_def(writable_gpr(1)); - collector.add_use(rn); - collector.add_use(rm); + collector.reg_use(rn); + collector.reg_use(rm); + collector.reg_def(writable_gpr(0)); + collector.reg_def(writable_gpr(1)); } &Inst::UMulWide { rn, .. } => { - collector.add_def(writable_gpr(0)); - collector.add_mod(writable_gpr(1)); - collector.add_use(rn); + collector.reg_use(rn); + collector.reg_def(writable_gpr(0)); + collector.reg_mod(writable_gpr(1)); } &Inst::SDivMod32 { rn, .. } | &Inst::SDivMod64 { rn, .. } => { - collector.add_def(writable_gpr(0)); - collector.add_mod(writable_gpr(1)); - collector.add_use(rn); + collector.reg_use(rn); + collector.reg_def(writable_gpr(0)); + collector.reg_mod(writable_gpr(1)); } &Inst::UDivMod32 { rn, .. } | &Inst::UDivMod64 { rn, .. } => { - collector.add_mod(writable_gpr(0)); - collector.add_mod(writable_gpr(1)); - collector.add_use(rn); + collector.reg_use(rn); + collector.reg_mod(writable_gpr(0)); + collector.reg_mod(writable_gpr(1)); } &Inst::Flogr { rn, .. } => { - collector.add_def(writable_gpr(0)); - collector.add_def(writable_gpr(1)); - collector.add_use(rn); + collector.reg_use(rn); + collector.reg_def(writable_gpr(0)); + collector.reg_def(writable_gpr(1)); } &Inst::ShiftRR { rd, rn, shift_reg, .. } => { - collector.add_def(rd); - collector.add_use(rn); - if shift_reg != zero_reg() { - collector.add_use(shift_reg); - } + collector.reg_def(rd); + collector.reg_use(rn); + collector.reg_use(shift_reg); } &Inst::RxSBG { rd, rn, .. } => { - collector.add_mod(rd); - collector.add_use(rn); + collector.reg_mod(rd); + collector.reg_use(rn); } &Inst::RxSBGTest { rd, rn, .. } => { - collector.add_use(rd); - collector.add_use(rn); + collector.reg_use(rd); + collector.reg_use(rn); } &Inst::UnaryRR { rd, rn, .. } => { - collector.add_def(rd); - collector.add_use(rn); + collector.reg_def(rd); + collector.reg_use(rn); } &Inst::CmpRR { rn, rm, .. } => { - collector.add_use(rn); - collector.add_use(rm); + collector.reg_use(rn); + collector.reg_use(rm); } &Inst::CmpRX { rn, ref mem, .. } => { - collector.add_use(rn); - memarg_regs(mem, collector); + collector.reg_use(rn); + memarg_operands(mem, collector); } &Inst::CmpRSImm16 { rn, .. } => { - collector.add_use(rn); + collector.reg_use(rn); } &Inst::CmpRSImm32 { rn, .. } => { - collector.add_use(rn); + collector.reg_use(rn); } &Inst::CmpRUImm32 { rn, .. } => { - collector.add_use(rn); + collector.reg_use(rn); } &Inst::CmpTrapRR { rn, rm, .. } => { - collector.add_use(rn); - collector.add_use(rm); + collector.reg_use(rn); + collector.reg_use(rm); } &Inst::CmpTrapRSImm16 { rn, .. } => { - collector.add_use(rn); + collector.reg_use(rn); } &Inst::CmpTrapRUImm16 { rn, .. } => { - collector.add_use(rn); + collector.reg_use(rn); } &Inst::AtomicRmw { rd, rn, ref mem, .. } => { - collector.add_def(rd); - collector.add_use(rn); - memarg_regs(mem, collector); + collector.reg_def(rd); + collector.reg_use(rn); + memarg_operands(mem, collector); } &Inst::AtomicCas32 { rd, rn, ref mem, .. @@ -493,9 +482,9 @@ fn s390x_get_regs(inst: &Inst, collector: &mut RegUsageCollector) { | &Inst::AtomicCas64 { rd, rn, ref mem, .. } => { - collector.add_mod(rd); - collector.add_use(rn); - memarg_regs(mem, collector); + collector.reg_mod(rd); + collector.reg_use(rn); + memarg_operands(mem, collector); } &Inst::Fence => {} &Inst::Load32 { rd, ref mem, .. } @@ -513,8 +502,8 @@ fn s390x_get_regs(inst: &Inst, collector: &mut RegUsageCollector) { | &Inst::LoadRev16 { rd, ref mem, .. } | &Inst::LoadRev32 { rd, ref mem, .. } | &Inst::LoadRev64 { rd, ref mem, .. } => { - collector.add_def(rd); - memarg_regs(mem, collector); + collector.reg_def(rd); + memarg_operands(mem, collector); } &Inst::Store8 { rd, ref mem, .. } | &Inst::Store16 { rd, ref mem, .. } @@ -523,42 +512,42 @@ fn s390x_get_regs(inst: &Inst, collector: &mut RegUsageCollector) { | &Inst::StoreRev16 { rd, ref mem, .. } | &Inst::StoreRev32 { rd, ref mem, .. } | &Inst::StoreRev64 { rd, ref mem, .. } => { - collector.add_use(rd); - memarg_regs(mem, collector); + collector.reg_use(rd); + memarg_operands(mem, collector); } &Inst::StoreImm8 { ref mem, .. } | &Inst::StoreImm16 { ref mem, .. } | &Inst::StoreImm32SExt16 { ref mem, .. } | &Inst::StoreImm64SExt16 { ref mem, .. } => { - memarg_regs(mem, collector); + memarg_operands(mem, collector); } &Inst::LoadMultiple64 { rt, rt2, ref mem, .. } => { - let first_regnum = rt.to_reg().get_hw_encoding(); - let last_regnum = rt2.to_reg().get_hw_encoding(); + memarg_operands(mem, collector); + let first_regnum = rt.to_reg().to_real_reg().unwrap().hw_enc(); + let last_regnum = rt2.to_reg().to_real_reg().unwrap().hw_enc(); for regnum in first_regnum..last_regnum + 1 { - collector.add_def(writable_gpr(regnum)); + collector.reg_def(writable_gpr(regnum)); } - memarg_regs(mem, collector); } &Inst::StoreMultiple64 { rt, rt2, ref mem, .. } => { - let first_regnum = rt.get_hw_encoding(); - let last_regnum = rt2.get_hw_encoding(); + memarg_operands(mem, collector); + let first_regnum = rt.to_real_reg().unwrap().hw_enc(); + let last_regnum = rt2.to_real_reg().unwrap().hw_enc(); for regnum in first_regnum..last_regnum + 1 { - collector.add_use(gpr(regnum)); + collector.reg_use(gpr(regnum)); } - memarg_regs(mem, collector); } &Inst::Mov64 { rd, rm } => { - collector.add_def(rd); - collector.add_use(rm); + collector.reg_def(rd); + collector.reg_use(rm); } &Inst::Mov32 { rd, rm } => { - collector.add_def(rd); - collector.add_use(rm); + collector.reg_def(rd); + collector.reg_use(rm); } &Inst::Mov32Imm { rd, .. } | &Inst::Mov32SImm16 { rd, .. } @@ -566,123 +555,126 @@ fn s390x_get_regs(inst: &Inst, collector: &mut RegUsageCollector) { | &Inst::Mov64SImm32 { rd, .. } | &Inst::Mov64UImm16Shifted { rd, .. } | &Inst::Mov64UImm32Shifted { rd, .. } => { - collector.add_def(rd); + collector.reg_def(rd); } &Inst::CMov32 { rd, rm, .. } | &Inst::CMov64 { rd, rm, .. } => { - collector.add_mod(rd); - collector.add_use(rm); + collector.reg_mod(rd); + collector.reg_use(rm); } &Inst::CMov32SImm16 { rd, .. } | &Inst::CMov64SImm16 { rd, .. } => { - collector.add_mod(rd); + collector.reg_mod(rd); } &Inst::Insert64UImm16Shifted { rd, .. } | &Inst::Insert64UImm32Shifted { rd, .. } => { - collector.add_mod(rd); + collector.reg_mod(rd); } &Inst::FpuMove32 { rd, rn } | &Inst::FpuMove64 { rd, rn } => { - collector.add_def(rd); - collector.add_use(rn); + collector.reg_def(rd); + collector.reg_use(rn); } &Inst::FpuCMov32 { rd, rm, .. } | &Inst::FpuCMov64 { rd, rm, .. } => { - collector.add_mod(rd); - collector.add_use(rm); + collector.reg_mod(rd); + collector.reg_use(rm); } &Inst::MovToFpr { rd, rn } | &Inst::MovFromFpr { rd, rn } => { - collector.add_def(rd); - collector.add_use(rn); + collector.reg_def(rd); + collector.reg_use(rn); } &Inst::FpuRR { rd, rn, .. } => { - collector.add_def(rd); - collector.add_use(rn); + collector.reg_def(rd); + collector.reg_use(rn); } &Inst::FpuRRR { rd, rm, .. } => { - collector.add_mod(rd); - collector.add_use(rm); + collector.reg_mod(rd); + collector.reg_use(rm); } &Inst::FpuRRRR { rd, rn, rm, .. } => { - collector.add_mod(rd); - collector.add_use(rn); - collector.add_use(rm); + collector.reg_mod(rd); + collector.reg_use(rn); + collector.reg_use(rm); } &Inst::FpuCopysign { rd, rn, rm, .. } => { - collector.add_def(rd); - collector.add_use(rn); - collector.add_use(rm); + collector.reg_def(rd); + collector.reg_use(rn); + collector.reg_use(rm); } &Inst::FpuCmp32 { rn, rm } | &Inst::FpuCmp64 { rn, rm } => { - collector.add_use(rn); - collector.add_use(rm); + collector.reg_use(rn); + collector.reg_use(rm); } &Inst::FpuLoad32 { rd, ref mem, .. } => { - collector.add_def(rd); - memarg_regs(mem, collector); + collector.reg_def(rd); + memarg_operands(mem, collector); } &Inst::FpuLoad64 { rd, ref mem, .. } => { - collector.add_def(rd); - memarg_regs(mem, collector); + collector.reg_def(rd); + memarg_operands(mem, collector); } &Inst::FpuStore32 { rd, ref mem, .. } => { - collector.add_use(rd); - memarg_regs(mem, collector); + collector.reg_use(rd); + memarg_operands(mem, collector); } &Inst::FpuStore64 { rd, ref mem, .. } => { - collector.add_use(rd); - memarg_regs(mem, collector); + collector.reg_use(rd); + memarg_operands(mem, collector); } &Inst::FpuLoadRev32 { rd, ref mem, .. } => { - collector.add_def(rd); - memarg_regs(mem, collector); + collector.reg_def(rd); + memarg_operands(mem, collector); } &Inst::FpuLoadRev64 { rd, ref mem, .. } => { - collector.add_def(rd); - memarg_regs(mem, collector); + collector.reg_def(rd); + memarg_operands(mem, collector); } &Inst::FpuStoreRev32 { rd, ref mem, .. } => { - collector.add_use(rd); - memarg_regs(mem, collector); + collector.reg_use(rd); + memarg_operands(mem, collector); } &Inst::FpuStoreRev64 { rd, ref mem, .. } => { - collector.add_use(rd); - memarg_regs(mem, collector); + collector.reg_use(rd); + memarg_operands(mem, collector); } &Inst::LoadFpuConst32 { rd, .. } | &Inst::LoadFpuConst64 { rd, .. } => { - collector.add_def(rd); + collector.reg_def(rd); } &Inst::FpuToInt { rd, rn, .. } => { - collector.add_def(rd); - collector.add_use(rn); + collector.reg_def(rd); + collector.reg_use(rn); } &Inst::IntToFpu { rd, rn, .. } => { - collector.add_def(rd); - collector.add_use(rn); + collector.reg_def(rd); + collector.reg_use(rn); } &Inst::FpuRound { rd, rn, .. } => { - collector.add_def(rd); - collector.add_use(rn); + collector.reg_def(rd); + collector.reg_use(rn); } &Inst::FpuVecRRR { rd, rn, rm, .. } => { - collector.add_def(rd); - collector.add_use(rn); - collector.add_use(rm); + collector.reg_def(rd); + collector.reg_use(rn); + collector.reg_use(rm); } &Inst::Extend { rd, rn, .. } => { - collector.add_def(rd); - collector.add_use(rn); + collector.reg_def(rd); + collector.reg_use(rn); } &Inst::Call { link, ref info } => { - collector.add_def(link); - collector.add_uses(&*info.uses); - collector.add_defs(&*info.defs); + collector.reg_def(link); + collector.reg_uses(&*info.uses); + collector.reg_defs(&*info.defs); } &Inst::CallInd { link, ref info } => { - collector.add_def(link); - collector.add_uses(&*info.uses); - collector.add_defs(&*info.defs); - collector.add_use(info.rn); + collector.reg_def(link); + collector.reg_use(info.rn); + collector.reg_uses(&*info.uses); + collector.reg_defs(&*info.defs); + } + &Inst::Ret { link, ref rets } => { + collector.reg_use(link); + collector.reg_uses(&rets[..]); } - &Inst::Ret { .. } => {} &Inst::Jump { .. } | &Inst::EpiloguePlaceholder => {} &Inst::IndirectBr { rn, .. } => { - collector.add_use(rn); + collector.reg_use(rn); } &Inst::CondBr { .. } | &Inst::OneWayCondBr { .. } => {} &Inst::Nop0 | Inst::Nop2 => {} @@ -690,769 +682,26 @@ fn s390x_get_regs(inst: &Inst, collector: &mut RegUsageCollector) { &Inst::Trap { .. } => {} &Inst::TrapIf { .. } => {} &Inst::JTSequence { ridx, .. } => { - collector.add_use(ridx); + collector.reg_use(ridx); } &Inst::LoadExtNameFar { rd, .. } => { - collector.add_def(rd); + collector.reg_def(rd); } &Inst::LoadAddr { rd, ref mem } => { - collector.add_def(rd); - memarg_regs(mem, collector); + collector.reg_def(rd); + memarg_operands(mem, collector); } &Inst::Loop { ref body, .. } => { for inst in body.iter() { - s390x_get_regs(inst, collector); + s390x_get_operands(inst, collector); } } &Inst::CondBreak { .. } => {} &Inst::VirtualSPOffsetAdj { .. } => {} - &Inst::ValueLabelMarker { reg, .. } => { - collector.add_use(reg); - } &Inst::Unwind { .. } => {} - } -} - -//============================================================================= -// Instructions: map_regs - -pub fn s390x_map_regs(inst: &mut Inst, mapper: &RM) { - fn map_mem(m: &RM, mem: &mut MemArg) { - match mem { - &mut MemArg::BXD12 { - ref mut base, - ref mut index, - .. - } - | &mut MemArg::BXD20 { - ref mut base, - ref mut index, - .. - } => { - if *base != zero_reg() { - m.map_use(base); - } - if *index != zero_reg() { - m.map_use(index); - } - } - &mut MemArg::Label { .. } | &mut MemArg::Symbol { .. } => {} - &mut MemArg::RegOffset { ref mut reg, .. } => m.map_use(reg), - &mut MemArg::InitialSPOffset { .. } | &mut MemArg::NominalSPOffset { .. } => {} - }; - } - - match inst { - &mut Inst::AluRRR { - ref mut rd, - ref mut rn, - ref mut rm, - .. - } => { - mapper.map_def(rd); - mapper.map_use(rn); - mapper.map_use(rm); + &Inst::DummyUse { reg } => { + collector.reg_use(reg); } - &mut Inst::AluRRSImm16 { - ref mut rd, - ref mut rn, - .. - } => { - mapper.map_def(rd); - mapper.map_use(rn); - } - &mut Inst::AluRX { - ref mut rd, - ref mut mem, - .. - } => { - mapper.map_mod(rd); - map_mem(mapper, mem); - } - &mut Inst::AluRR { - ref mut rd, - ref mut rm, - .. - } => { - mapper.map_mod(rd); - mapper.map_use(rm); - } - &mut Inst::AluRSImm16 { ref mut rd, .. } => { - mapper.map_mod(rd); - } - &mut Inst::AluRSImm32 { ref mut rd, .. } => { - mapper.map_mod(rd); - } - &mut Inst::AluRUImm32 { ref mut rd, .. } => { - mapper.map_mod(rd); - } - &mut Inst::AluRUImm16Shifted { ref mut rd, .. } => { - mapper.map_mod(rd); - } - &mut Inst::AluRUImm32Shifted { ref mut rd, .. } => { - mapper.map_mod(rd); - } - &mut Inst::SMulWide { - ref mut rn, - ref mut rm, - .. - } => { - mapper.map_use(rn); - mapper.map_use(rm); - } - &mut Inst::UMulWide { ref mut rn, .. } => { - mapper.map_use(rn); - } - &mut Inst::SDivMod32 { ref mut rn, .. } => { - mapper.map_use(rn); - } - &mut Inst::SDivMod64 { ref mut rn, .. } => { - mapper.map_use(rn); - } - &mut Inst::UDivMod32 { ref mut rn, .. } => { - mapper.map_use(rn); - } - &mut Inst::UDivMod64 { ref mut rn, .. } => { - mapper.map_use(rn); - } - &mut Inst::Flogr { ref mut rn, .. } => { - mapper.map_use(rn); - } - &mut Inst::ShiftRR { - ref mut rd, - ref mut rn, - ref mut shift_reg, - .. - } => { - mapper.map_def(rd); - mapper.map_use(rn); - if *shift_reg != zero_reg() { - mapper.map_use(shift_reg); - } - } - &mut Inst::RxSBG { - ref mut rd, - ref mut rn, - .. - } => { - mapper.map_mod(rd); - mapper.map_use(rn); - } - &mut Inst::RxSBGTest { - ref mut rd, - ref mut rn, - .. - } => { - mapper.map_use(rd); - mapper.map_use(rn); - } - &mut Inst::UnaryRR { - ref mut rd, - ref mut rn, - .. - } => { - mapper.map_def(rd); - mapper.map_use(rn); - } - &mut Inst::CmpRR { - ref mut rn, - ref mut rm, - .. - } => { - mapper.map_use(rn); - mapper.map_use(rm); - } - &mut Inst::CmpRX { - ref mut rn, - ref mut mem, - .. - } => { - mapper.map_use(rn); - map_mem(mapper, mem); - } - &mut Inst::CmpRSImm16 { ref mut rn, .. } => { - mapper.map_use(rn); - } - &mut Inst::CmpRSImm32 { ref mut rn, .. } => { - mapper.map_use(rn); - } - &mut Inst::CmpRUImm32 { ref mut rn, .. } => { - mapper.map_use(rn); - } - &mut Inst::CmpTrapRR { - ref mut rn, - ref mut rm, - .. - } => { - mapper.map_use(rn); - mapper.map_use(rm); - } - &mut Inst::CmpTrapRSImm16 { ref mut rn, .. } => { - mapper.map_use(rn); - } - &mut Inst::CmpTrapRUImm16 { ref mut rn, .. } => { - mapper.map_use(rn); - } - - &mut Inst::AtomicRmw { - ref mut rd, - ref mut rn, - ref mut mem, - .. - } => { - mapper.map_def(rd); - mapper.map_use(rn); - map_mem(mapper, mem); - } - &mut Inst::AtomicCas32 { - ref mut rd, - ref mut rn, - ref mut mem, - .. - } => { - mapper.map_mod(rd); - mapper.map_use(rn); - map_mem(mapper, mem); - } - &mut Inst::AtomicCas64 { - ref mut rd, - ref mut rn, - ref mut mem, - .. - } => { - mapper.map_mod(rd); - mapper.map_use(rn); - map_mem(mapper, mem); - } - &mut Inst::Fence => {} - - &mut Inst::Load32 { - ref mut rd, - ref mut mem, - .. - } => { - mapper.map_def(rd); - map_mem(mapper, mem); - } - &mut Inst::Load32ZExt8 { - ref mut rd, - ref mut mem, - .. - } => { - mapper.map_def(rd); - map_mem(mapper, mem); - } - &mut Inst::Load32SExt8 { - ref mut rd, - ref mut mem, - .. - } => { - mapper.map_def(rd); - map_mem(mapper, mem); - } - &mut Inst::Load32ZExt16 { - ref mut rd, - ref mut mem, - .. - } => { - mapper.map_def(rd); - map_mem(mapper, mem); - } - &mut Inst::Load32SExt16 { - ref mut rd, - ref mut mem, - .. - } => { - mapper.map_def(rd); - map_mem(mapper, mem); - } - &mut Inst::Load64 { - ref mut rd, - ref mut mem, - .. - } => { - mapper.map_def(rd); - map_mem(mapper, mem); - } - &mut Inst::Load64ZExt8 { - ref mut rd, - ref mut mem, - .. - } => { - mapper.map_def(rd); - map_mem(mapper, mem); - } - &mut Inst::Load64SExt8 { - ref mut rd, - ref mut mem, - .. - } => { - mapper.map_def(rd); - map_mem(mapper, mem); - } - &mut Inst::Load64ZExt16 { - ref mut rd, - ref mut mem, - .. - } => { - mapper.map_def(rd); - map_mem(mapper, mem); - } - &mut Inst::Load64SExt16 { - ref mut rd, - ref mut mem, - .. - } => { - mapper.map_def(rd); - map_mem(mapper, mem); - } - &mut Inst::Load64ZExt32 { - ref mut rd, - ref mut mem, - .. - } => { - mapper.map_def(rd); - map_mem(mapper, mem); - } - &mut Inst::Load64SExt32 { - ref mut rd, - ref mut mem, - .. - } => { - mapper.map_def(rd); - map_mem(mapper, mem); - } - &mut Inst::LoadRev16 { - ref mut rd, - ref mut mem, - .. - } => { - mapper.map_def(rd); - map_mem(mapper, mem); - } - &mut Inst::LoadRev32 { - ref mut rd, - ref mut mem, - .. - } => { - mapper.map_def(rd); - map_mem(mapper, mem); - } - &mut Inst::LoadRev64 { - ref mut rd, - ref mut mem, - .. - } => { - mapper.map_def(rd); - map_mem(mapper, mem); - } - - &mut Inst::Store8 { - ref mut rd, - ref mut mem, - .. - } => { - mapper.map_use(rd); - map_mem(mapper, mem); - } - &mut Inst::Store16 { - ref mut rd, - ref mut mem, - .. - } => { - mapper.map_use(rd); - map_mem(mapper, mem); - } - &mut Inst::Store32 { - ref mut rd, - ref mut mem, - .. - } => { - mapper.map_use(rd); - map_mem(mapper, mem); - } - &mut Inst::Store64 { - ref mut rd, - ref mut mem, - .. - } => { - mapper.map_use(rd); - map_mem(mapper, mem); - } - &mut Inst::StoreImm8 { ref mut mem, .. } => { - map_mem(mapper, mem); - } - &mut Inst::StoreImm16 { ref mut mem, .. } => { - map_mem(mapper, mem); - } - &mut Inst::StoreImm32SExt16 { ref mut mem, .. } => { - map_mem(mapper, mem); - } - &mut Inst::StoreImm64SExt16 { ref mut mem, .. } => { - map_mem(mapper, mem); - } - &mut Inst::StoreRev16 { - ref mut rd, - ref mut mem, - .. - } => { - mapper.map_use(rd); - map_mem(mapper, mem); - } - &mut Inst::StoreRev32 { - ref mut rd, - ref mut mem, - .. - } => { - mapper.map_use(rd); - map_mem(mapper, mem); - } - &mut Inst::StoreRev64 { - ref mut rd, - ref mut mem, - .. - } => { - mapper.map_use(rd); - map_mem(mapper, mem); - } - &mut Inst::LoadMultiple64 { .. } => { - // This instruction accesses all registers between rt and rt2, - // so it cannot be remapped. But this does not matter since - // the instruction is only ever used after register allocation. - unreachable!(); - } - &mut Inst::StoreMultiple64 { .. } => { - // This instruction accesses all registers between rt and rt2, - // so it cannot be remapped. But this does not matter since - // the instruction is only ever used after register allocation. - unreachable!(); - } - - &mut Inst::Mov64 { - ref mut rd, - ref mut rm, - } => { - mapper.map_def(rd); - mapper.map_use(rm); - } - &mut Inst::Mov32 { - ref mut rd, - ref mut rm, - } => { - mapper.map_def(rd); - mapper.map_use(rm); - } - &mut Inst::Mov32Imm { ref mut rd, .. } => { - mapper.map_def(rd); - } - &mut Inst::Mov32SImm16 { ref mut rd, .. } => { - mapper.map_def(rd); - } - &mut Inst::Mov64SImm16 { ref mut rd, .. } => { - mapper.map_def(rd); - } - &mut Inst::Mov64SImm32 { ref mut rd, .. } => { - mapper.map_def(rd); - } - &mut Inst::Mov64UImm16Shifted { ref mut rd, .. } => { - mapper.map_def(rd); - } - &mut Inst::Mov64UImm32Shifted { ref mut rd, .. } => { - mapper.map_def(rd); - } - &mut Inst::Insert64UImm16Shifted { ref mut rd, .. } => { - mapper.map_mod(rd); - } - &mut Inst::Insert64UImm32Shifted { ref mut rd, .. } => { - mapper.map_mod(rd); - } - &mut Inst::CMov64 { - ref mut rd, - ref mut rm, - .. - } => { - mapper.map_mod(rd); - mapper.map_use(rm); - } - &mut Inst::CMov32 { - ref mut rd, - ref mut rm, - .. - } => { - mapper.map_mod(rd); - mapper.map_use(rm); - } - &mut Inst::CMov32SImm16 { ref mut rd, .. } => { - mapper.map_mod(rd); - } - &mut Inst::CMov64SImm16 { ref mut rd, .. } => { - mapper.map_mod(rd); - } - &mut Inst::FpuMove32 { - ref mut rd, - ref mut rn, - } => { - mapper.map_def(rd); - mapper.map_use(rn); - } - &mut Inst::FpuMove64 { - ref mut rd, - ref mut rn, - } => { - mapper.map_def(rd); - mapper.map_use(rn); - } - &mut Inst::FpuCMov64 { - ref mut rd, - ref mut rm, - .. - } => { - mapper.map_mod(rd); - mapper.map_use(rm); - } - &mut Inst::FpuCMov32 { - ref mut rd, - ref mut rm, - .. - } => { - mapper.map_mod(rd); - mapper.map_use(rm); - } - &mut Inst::MovToFpr { - ref mut rd, - ref mut rn, - } => { - mapper.map_def(rd); - mapper.map_use(rn); - } - &mut Inst::MovFromFpr { - ref mut rd, - ref mut rn, - } => { - mapper.map_def(rd); - mapper.map_use(rn); - } - &mut Inst::FpuRR { - ref mut rd, - ref mut rn, - .. - } => { - mapper.map_def(rd); - mapper.map_use(rn); - } - &mut Inst::FpuRRR { - ref mut rd, - ref mut rm, - .. - } => { - mapper.map_mod(rd); - mapper.map_use(rm); - } - &mut Inst::FpuRRRR { - ref mut rd, - ref mut rn, - ref mut rm, - .. - } => { - mapper.map_mod(rd); - mapper.map_use(rn); - mapper.map_use(rm); - } - &mut Inst::FpuCopysign { - ref mut rd, - ref mut rn, - ref mut rm, - .. - } => { - mapper.map_def(rd); - mapper.map_use(rn); - mapper.map_use(rm); - } - &mut Inst::FpuCmp32 { - ref mut rn, - ref mut rm, - } => { - mapper.map_use(rn); - mapper.map_use(rm); - } - &mut Inst::FpuCmp64 { - ref mut rn, - ref mut rm, - } => { - mapper.map_use(rn); - mapper.map_use(rm); - } - &mut Inst::FpuLoad32 { - ref mut rd, - ref mut mem, - .. - } => { - mapper.map_def(rd); - map_mem(mapper, mem); - } - &mut Inst::FpuLoad64 { - ref mut rd, - ref mut mem, - .. - } => { - mapper.map_def(rd); - map_mem(mapper, mem); - } - &mut Inst::FpuStore32 { - ref mut rd, - ref mut mem, - .. - } => { - mapper.map_use(rd); - map_mem(mapper, mem); - } - &mut Inst::FpuStore64 { - ref mut rd, - ref mut mem, - .. - } => { - mapper.map_use(rd); - map_mem(mapper, mem); - } - &mut Inst::FpuLoadRev32 { - ref mut rd, - ref mut mem, - .. - } => { - mapper.map_def(rd); - map_mem(mapper, mem); - } - &mut Inst::FpuLoadRev64 { - ref mut rd, - ref mut mem, - .. - } => { - mapper.map_def(rd); - map_mem(mapper, mem); - } - &mut Inst::FpuStoreRev32 { - ref mut rd, - ref mut mem, - .. - } => { - mapper.map_use(rd); - map_mem(mapper, mem); - } - &mut Inst::FpuStoreRev64 { - ref mut rd, - ref mut mem, - .. - } => { - mapper.map_use(rd); - map_mem(mapper, mem); - } - &mut Inst::LoadFpuConst32 { ref mut rd, .. } => { - mapper.map_def(rd); - } - &mut Inst::LoadFpuConst64 { ref mut rd, .. } => { - mapper.map_def(rd); - } - &mut Inst::FpuToInt { - ref mut rd, - ref mut rn, - .. - } => { - mapper.map_def(rd); - mapper.map_use(rn); - } - &mut Inst::IntToFpu { - ref mut rd, - ref mut rn, - .. - } => { - mapper.map_def(rd); - mapper.map_use(rn); - } - &mut Inst::FpuRound { - ref mut rd, - ref mut rn, - .. - } => { - mapper.map_def(rd); - mapper.map_use(rn); - } - &mut Inst::FpuVecRRR { - ref mut rd, - ref mut rn, - ref mut rm, - .. - } => { - mapper.map_def(rd); - mapper.map_use(rn); - mapper.map_use(rm); - } - &mut Inst::Extend { - ref mut rd, - ref mut rn, - .. - } => { - mapper.map_def(rd); - mapper.map_use(rn); - } - &mut Inst::Call { - ref mut link, - ref mut info, - } => { - mapper.map_def(link); - for r in info.uses.iter_mut() { - mapper.map_use(r); - } - for r in info.defs.iter_mut() { - mapper.map_def(r); - } - } - &mut Inst::CallInd { - ref mut link, - ref mut info, - .. - } => { - mapper.map_def(link); - for r in info.uses.iter_mut() { - mapper.map_use(r); - } - for r in info.defs.iter_mut() { - mapper.map_def(r); - } - mapper.map_use(&mut info.rn); - } - &mut Inst::Ret { .. } => {} - &mut Inst::EpiloguePlaceholder => {} - &mut Inst::Jump { .. } => {} - &mut Inst::IndirectBr { ref mut rn, .. } => { - mapper.map_use(rn); - } - &mut Inst::CondBr { .. } | &mut Inst::OneWayCondBr { .. } => {} - &mut Inst::Debugtrap | &mut Inst::Trap { .. } | &mut Inst::TrapIf { .. } => {} - &mut Inst::Nop0 | &mut Inst::Nop2 => {} - &mut Inst::JTSequence { ref mut ridx, .. } => { - mapper.map_use(ridx); - } - &mut Inst::LoadExtNameFar { ref mut rd, .. } => { - mapper.map_def(rd); - } - &mut Inst::LoadAddr { - ref mut rd, - ref mut mem, - } => { - mapper.map_def(rd); - map_mem(mapper, mem); - } - &mut Inst::Loop { ref mut body, .. } => { - for inst in body.iter_mut() { - s390x_map_regs(inst, mapper); - } - } - &mut Inst::CondBreak { .. } => {} - &mut Inst::VirtualSPOffsetAdj { .. } => {} - &mut Inst::ValueLabelMarker { ref mut reg, .. } => { - mapper.map_use(reg); - } - &mut Inst::Unwind { .. } => {} } } @@ -1462,12 +711,8 @@ pub fn s390x_map_regs(inst: &mut Inst, mapper: &RM) { impl MachInst for Inst { type LabelUse = LabelUse; - fn get_regs(&self, collector: &mut RegUsageCollector) { - s390x_get_regs(self, collector) - } - - fn map_regs(&mut self, mapper: &RUM) { - s390x_map_regs(self, mapper); + fn get_operands VReg>(&self, collector: &mut OperandCollector<'_, F>) { + s390x_get_operands(self, collector); } fn is_move(&self) -> Option<(Writable, Reg)> { @@ -1505,18 +750,16 @@ impl MachInst for Inst { } } - fn stack_op_info(&self) -> Option { + fn is_safepoint(&self) -> bool { match self { - &Inst::VirtualSPOffsetAdj { offset } => Some(MachInstStackOpInfo::NomSPAdj(offset)), - &Inst::Store64 { - rd, - mem: MemArg::NominalSPOffset { off }, - } => Some(MachInstStackOpInfo::StoreNomSPOff(rd, off)), - &Inst::Load64 { - rd, - mem: MemArg::NominalSPOffset { off }, - } => Some(MachInstStackOpInfo::LoadNomSPOff(rd.to_reg(), off)), - _ => None, + &Inst::Call { .. } + | &Inst::CallInd { .. } + | &Inst::Trap { .. } + | Inst::TrapIf { .. } + | &Inst::CmpTrapRR { .. } + | &Inst::CmpTrapRSImm16 { .. } + | &Inst::CmpTrapRUImm16 { .. } => true, + _ => false, } } @@ -1575,30 +818,26 @@ impl MachInst for Inst { } } - fn maybe_direct_reload(&self, _reg: VirtualReg, _slot: SpillSlot) -> Option { - None - } - fn rc_for_type(ty: Type) -> CodegenResult<(&'static [RegClass], &'static [Type])> { match ty { - types::I8 => Ok((&[RegClass::I64], &[types::I8])), - types::I16 => Ok((&[RegClass::I64], &[types::I16])), - types::I32 => Ok((&[RegClass::I64], &[types::I32])), - types::I64 => Ok((&[RegClass::I64], &[types::I64])), - types::B1 => Ok((&[RegClass::I64], &[types::B1])), - types::B8 => Ok((&[RegClass::I64], &[types::B8])), - types::B16 => Ok((&[RegClass::I64], &[types::B16])), - types::B32 => Ok((&[RegClass::I64], &[types::B32])), - types::B64 => Ok((&[RegClass::I64], &[types::B64])), + types::I8 => Ok((&[RegClass::Int], &[types::I8])), + types::I16 => Ok((&[RegClass::Int], &[types::I16])), + types::I32 => Ok((&[RegClass::Int], &[types::I32])), + types::I64 => Ok((&[RegClass::Int], &[types::I64])), + types::B1 => Ok((&[RegClass::Int], &[types::B1])), + types::B8 => Ok((&[RegClass::Int], &[types::B8])), + types::B16 => Ok((&[RegClass::Int], &[types::B16])), + types::B32 => Ok((&[RegClass::Int], &[types::B32])), + types::B64 => Ok((&[RegClass::Int], &[types::B64])), types::R32 => panic!("32-bit reftype pointer should never be seen on s390x"), - types::R64 => Ok((&[RegClass::I64], &[types::R64])), - types::F32 => Ok((&[RegClass::F64], &[types::F32])), - types::F64 => Ok((&[RegClass::F64], &[types::F64])), - types::I128 => Ok((&[RegClass::I64, RegClass::I64], &[types::I64, types::I64])), - types::B128 => Ok((&[RegClass::I64, RegClass::I64], &[types::B64, types::B64])), + types::R64 => Ok((&[RegClass::Int], &[types::R64])), + types::F32 => Ok((&[RegClass::Float], &[types::F32])), + types::F64 => Ok((&[RegClass::Float], &[types::F64])), + types::I128 => Ok((&[RegClass::Int, RegClass::Int], &[types::I64, types::I64])), + types::B128 => Ok((&[RegClass::Int, RegClass::Int], &[types::B64, types::B64])), // FIXME: We don't really have IFLAGS, but need to allow it here // for now to support the SelectifSpectreGuard instruction. - types::IFLAGS => Ok((&[RegClass::I64], &[types::I64])), + types::IFLAGS => Ok((&[RegClass::Int], &[types::I64])), _ => Err(CodegenError::Unsupported(format!( "Unexpected SSA-value type: {}", ty @@ -1606,6 +845,13 @@ impl MachInst for Inst { } } + fn canonical_type_for_rc(rc: RegClass) -> Type { + match rc { + RegClass::Int => types::I64, + RegClass::Float => types::F64, + } + } + fn gen_jump(target: MachLabel) -> Inst { Inst::Jump { dest: target } } @@ -1622,18 +868,11 @@ impl MachInst for Inst { } fn ref_type_regclass(_: &settings::Flags) -> RegClass { - RegClass::I64 + RegClass::Int } - fn gen_value_label_marker(label: ValueLabel, reg: Reg) -> Self { - Inst::ValueLabelMarker { label, reg } - } - - fn defines_value_label(&self) -> Option<(ValueLabel, Reg)> { - match self { - Inst::ValueLabelMarker { label, reg } => Some((*label, *reg)), - _ => None, - } + fn gen_dummy_use(reg: Reg) -> Inst { + Inst::DummyUse { reg } } } @@ -1642,7 +881,6 @@ impl MachInst for Inst { fn mem_finalize_for_show( mem: &MemArg, - mb_rru: Option<&RealRegUniverse>, state: &EmitState, have_d12: bool, have_d20: bool, @@ -1652,7 +890,9 @@ fn mem_finalize_for_show( let (mem_insts, mem) = mem_finalize(mem, state, have_d12, have_d20, have_pcrel, have_index); let mut mem_str = mem_insts .into_iter() - .map(|inst| inst.show_rru(mb_rru)) + .map(|inst| { + inst.print_with_state(&mut EmitState::default(), &mut AllocationConsumer::new(&[])) + }) .collect::>() .join(" ; "); if !mem_str.is_empty() { @@ -1662,18 +902,25 @@ fn mem_finalize_for_show( (mem_str, mem) } -impl PrettyPrint for Inst { - fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String { - self.pretty_print(mb_rru, &mut EmitState::default()) - } -} - impl Inst { - fn print_with_state(&self, mb_rru: Option<&RealRegUniverse>, state: &mut EmitState) -> String { + fn print_with_state( + &self, + state: &mut EmitState, + allocs: &mut AllocationConsumer<'_>, + ) -> String { + // N.B.: order of consumption of `allocs` must match the order + // in `s390x_get_operands()`. + + let mut empty_allocs = AllocationConsumer::new(&[]); + match self { &Inst::Nop0 => "nop-zero-len".to_string(), &Inst::Nop2 => "nop".to_string(), &Inst::AluRRR { alu_op, rd, rn, rm } => { + let rd = allocs.next_writable(rd); + let rn = allocs.next(rn); + let rm = allocs.next(rm); + let (op, have_rr) = match alu_op { ALUOp::Add32 => ("ark", true), ALUOp::Add64 => ("agrk", true), @@ -1701,11 +948,11 @@ impl Inst { }; if have_rr && rd.to_reg() == rn { let inst = Inst::AluRR { alu_op, rd, rm }; - return inst.print_with_state(mb_rru, state); + return inst.print_with_state(state, &mut empty_allocs); } - let rd = rd.to_reg().show_rru(mb_rru); - let rn = rn.show_rru(mb_rru); - let rm = rm.show_rru(mb_rru); + let rd = pretty_print_reg(rd.to_reg(), &mut empty_allocs); + let rn = pretty_print_reg(rn, &mut empty_allocs); + let rm = pretty_print_reg(rm, &mut empty_allocs); format!("{} {}, {}, {}", op, rd, rn, rm) } &Inst::AluRRSImm16 { @@ -1714,17 +961,20 @@ impl Inst { rn, imm, } => { + let rd = allocs.next_writable(rd); + let rn = allocs.next(rn); + if rd.to_reg() == rn { let inst = Inst::AluRSImm16 { alu_op, rd, imm }; - return inst.print_with_state(mb_rru, state); + return inst.print_with_state(state, &mut empty_allocs); } let op = match alu_op { ALUOp::Add32 => "ahik", ALUOp::Add64 => "aghik", _ => unreachable!(), }; - let rd = rd.to_reg().show_rru(mb_rru); - let rn = rn.show_rru(mb_rru); + let rd = pretty_print_reg(rd.to_reg(), &mut empty_allocs); + let rn = pretty_print_reg(rn, &mut empty_allocs); format!("{} {}, {}, {}", op, rd, rn, imm) } &Inst::AluRR { alu_op, rd, rm } => { @@ -1752,8 +1002,8 @@ impl Inst { ALUOp::Xor64 => "xgr", _ => unreachable!(), }; - let rd = rd.to_reg().show_rru(mb_rru); - let rm = rm.show_rru(mb_rru); + let rd = pretty_print_reg(rd.to_reg(), allocs); + let rm = pretty_print_reg(rm, allocs); format!("{} {}, {}", op, rd, rm) } &Inst::AluRX { @@ -1792,24 +1042,23 @@ impl Inst { _ => unreachable!(), }; + let rd = pretty_print_reg(rd.to_reg(), allocs); + let mem = mem.with_allocs(allocs); let (mem_str, mem) = mem_finalize_for_show( - mem, - mb_rru, + &mem, state, opcode_rx.is_some(), opcode_rxy.is_some(), false, true, ); - let op = match &mem { &MemArg::BXD12 { .. } => opcode_rx, &MemArg::BXD20 { .. } => opcode_rxy, _ => unreachable!(), }; + let mem = mem.pretty_print_default(); - let rd = rd.to_reg().show_rru(mb_rru); - let mem = mem.show_rru(mb_rru); format!("{}{} {}, {}", mem_str, op.unwrap(), rd, mem) } &Inst::AluRSImm16 { alu_op, rd, imm } => { @@ -1820,7 +1069,7 @@ impl Inst { ALUOp::Mul64 => "mghi", _ => unreachable!(), }; - let rd = rd.to_reg().show_rru(mb_rru); + let rd = pretty_print_reg(rd.to_reg(), allocs); format!("{} {}, {}", op, rd, imm) } &Inst::AluRSImm32 { alu_op, rd, imm } => { @@ -1831,7 +1080,7 @@ impl Inst { ALUOp::Mul64 => "msgfi", _ => unreachable!(), }; - let rd = rd.to_reg().show_rru(mb_rru); + let rd = pretty_print_reg(rd.to_reg(), allocs); format!("{} {}, {}", op, rd, imm) } &Inst::AluRUImm32 { alu_op, rd, imm } => { @@ -1842,7 +1091,7 @@ impl Inst { ALUOp::SubLogical64 => "slgfi", _ => unreachable!(), }; - let rd = rd.to_reg().show_rru(mb_rru); + let rd = pretty_print_reg(rd.to_reg(), allocs); format!("{} {}, {}", op, rd, imm) } &Inst::AluRUImm16Shifted { alu_op, rd, imm } => { @@ -1861,7 +1110,7 @@ impl Inst { (ALUOp::Orr64, 3) => "oihh", _ => unreachable!(), }; - let rd = rd.to_reg().show_rru(mb_rru); + let rd = pretty_print_reg(rd.to_reg(), allocs); format!("{} {}, {}", op, rd, imm.bits) } &Inst::AluRUImm32Shifted { alu_op, rd, imm } => { @@ -1877,50 +1126,57 @@ impl Inst { (ALUOp::Xor64, 1) => "xihf", _ => unreachable!(), }; - let rd = rd.to_reg().show_rru(mb_rru); + let rd = pretty_print_reg(rd.to_reg(), allocs); format!("{} {}, {}", op, rd, imm.bits) } &Inst::SMulWide { rn, rm } => { let op = "mgrk"; - let rd = gpr(0).show_rru(mb_rru); - let rn = rn.show_rru(mb_rru); - let rm = rm.show_rru(mb_rru); + let rn = pretty_print_reg(rn, allocs); + let rm = pretty_print_reg(rm, allocs); + let rd = pretty_print_reg(gpr(0), allocs); + let _r1 = allocs.next(gpr(1)); format!("{} {}, {}, {}", op, rd, rn, rm) } &Inst::UMulWide { rn } => { let op = "mlgr"; - let rd = gpr(0).show_rru(mb_rru); - let rn = rn.show_rru(mb_rru); + let rn = pretty_print_reg(rn, allocs); + let rd = pretty_print_reg(gpr(0), allocs); + let _r1 = allocs.next(gpr(1)); format!("{} {}, {}", op, rd, rn) } &Inst::SDivMod32 { rn, .. } => { let op = "dsgfr"; - let rd = gpr(0).show_rru(mb_rru); - let rn = rn.show_rru(mb_rru); + let rn = pretty_print_reg(rn, allocs); + let rd = pretty_print_reg(gpr(0), allocs); + let _r1 = allocs.next(gpr(1)); format!("{} {}, {}", op, rd, rn) } &Inst::SDivMod64 { rn, .. } => { let op = "dsgr"; - let rd = gpr(0).show_rru(mb_rru); - let rn = rn.show_rru(mb_rru); + let rn = pretty_print_reg(rn, allocs); + let rd = pretty_print_reg(gpr(0), allocs); + let _r1 = allocs.next(gpr(1)); format!("{} {}, {}", op, rd, rn) } &Inst::UDivMod32 { rn, .. } => { let op = "dlr"; - let rd = gpr(0).show_rru(mb_rru); - let rn = rn.show_rru(mb_rru); + let rn = pretty_print_reg(rn, allocs); + let rd = pretty_print_reg(gpr(0), allocs); + let _r1 = allocs.next(gpr(1)); format!("{} {}, {}", op, rd, rn) } &Inst::UDivMod64 { rn, .. } => { let op = "dlgr"; - let rd = gpr(0).show_rru(mb_rru); - let rn = rn.show_rru(mb_rru); + let rn = pretty_print_reg(rn, allocs); + let rd = pretty_print_reg(gpr(0), allocs); + let _r1 = allocs.next(gpr(1)); format!("{} {}, {}", op, rd, rn) } &Inst::Flogr { rn } => { let op = "flogr"; - let rd = gpr(0).show_rru(mb_rru); - let rn = rn.show_rru(mb_rru); + let rn = pretty_print_reg(rn, allocs); + let rd = pretty_print_reg(gpr(0), allocs); + let _r1 = allocs.next(gpr(1)); format!("{} {}, {}", op, rd, rn) } &Inst::ShiftRR { @@ -1940,10 +1196,10 @@ impl Inst { ShiftOp::AShR32 => "srak", ShiftOp::AShR64 => "srag", }; - let rd = rd.to_reg().show_rru(mb_rru); - let rn = rn.show_rru(mb_rru); + let rd = pretty_print_reg(rd.to_reg(), allocs); + let rn = pretty_print_reg(rn, allocs); let shift_reg = if shift_reg != zero_reg() { - format!("({})", shift_reg.show_rru(mb_rru)) + format!("({})", pretty_print_reg(shift_reg, allocs)) } else { "".to_string() }; @@ -1963,8 +1219,8 @@ impl Inst { RxSBGOp::Or => "rosbg", RxSBGOp::Xor => "rxsbg", }; - let rd = rd.to_reg().show_rru(mb_rru); - let rn = rn.show_rru(mb_rru); + let rd = pretty_print_reg(rd.to_reg(), allocs); + let rn = pretty_print_reg(rn, allocs); format!( "{} {}, {}, {}, {}, {}", op, @@ -1989,8 +1245,8 @@ impl Inst { RxSBGOp::Xor => "rxsbg", _ => unreachable!(), }; - let rd = rd.show_rru(mb_rru); - let rn = rn.show_rru(mb_rru); + let rd = pretty_print_reg(rd, allocs); + let rn = pretty_print_reg(rn, allocs); format!( "{} {}, {}, {}, {}, {}", op, @@ -2014,8 +1270,8 @@ impl Inst { UnaryOp::BSwap32 => ("lrvr", ""), UnaryOp::BSwap64 => ("lrvgr", ""), }; - let rd = rd.to_reg().show_rru(mb_rru); - let rn = rn.show_rru(mb_rru); + let rd = pretty_print_reg(rd.to_reg(), allocs); + let rn = pretty_print_reg(rn, allocs); format!("{} {}, {}{}", op, rd, rn, extra) } &Inst::CmpRR { op, rn, rm } => { @@ -2028,8 +1284,8 @@ impl Inst { CmpOp::CmpL64Ext32 => "clgfr", _ => unreachable!(), }; - let rn = rn.show_rru(mb_rru); - let rm = rm.show_rru(mb_rru); + let rn = pretty_print_reg(rn, allocs); + let rm = pretty_print_reg(rm, allocs); format!("{} {}, {}", op, rn, rm) } &Inst::CmpRX { op, rn, ref mem } => { @@ -2046,25 +1302,24 @@ impl Inst { CmpOp::CmpL64Ext32 => (None, Some("clgf"), Some("clgfrl")), }; + let rn = pretty_print_reg(rn, allocs); + let mem = mem.with_allocs(allocs); let (mem_str, mem) = mem_finalize_for_show( - mem, - mb_rru, + &mem, state, opcode_rx.is_some(), opcode_rxy.is_some(), opcode_ril.is_some(), true, ); - let op = match &mem { &MemArg::BXD12 { .. } => opcode_rx, &MemArg::BXD20 { .. } => opcode_rxy, &MemArg::Label { .. } | &MemArg::Symbol { .. } => opcode_ril, _ => unreachable!(), }; + let mem = mem.pretty_print_default(); - let rn = rn.show_rru(mb_rru); - let mem = mem.show_rru(mb_rru); format!("{}{} {}, {}", mem_str, op.unwrap(), rn, mem) } &Inst::CmpRSImm16 { op, rn, imm } => { @@ -2073,7 +1328,7 @@ impl Inst { CmpOp::CmpS64 => "cghi", _ => unreachable!(), }; - let rn = rn.show_rru(mb_rru); + let rn = pretty_print_reg(rn, allocs); format!("{} {}, {}", op, rn, imm) } &Inst::CmpRSImm32 { op, rn, imm } => { @@ -2082,7 +1337,7 @@ impl Inst { CmpOp::CmpS64 => "cgfi", _ => unreachable!(), }; - let rn = rn.show_rru(mb_rru); + let rn = pretty_print_reg(rn, allocs); format!("{} {}, {}", op, rn, imm) } &Inst::CmpRUImm32 { op, rn, imm } => { @@ -2091,7 +1346,7 @@ impl Inst { CmpOp::CmpL64 => "clgfi", _ => unreachable!(), }; - let rn = rn.show_rru(mb_rru); + let rn = pretty_print_reg(rn, allocs); format!("{} {}, {}", op, rn, imm) } &Inst::CmpTrapRR { @@ -2104,9 +1359,9 @@ impl Inst { CmpOp::CmpL64 => "clgrt", _ => unreachable!(), }; - let rn = rn.show_rru(mb_rru); - let rm = rm.show_rru(mb_rru); - let cond = cond.show_rru(mb_rru); + let rn = pretty_print_reg(rn, allocs); + let rm = pretty_print_reg(rm, allocs); + let cond = cond.pretty_print_default(); format!("{}{} {}, {}", op, cond, rn, rm) } &Inst::CmpTrapRSImm16 { @@ -2117,8 +1372,8 @@ impl Inst { CmpOp::CmpS64 => "cgit", _ => unreachable!(), }; - let rn = rn.show_rru(mb_rru); - let cond = cond.show_rru(mb_rru); + let rn = pretty_print_reg(rn, allocs); + let cond = cond.pretty_print_default(); format!("{}{} {}, {}", op, cond, rn, imm) } &Inst::CmpTrapRUImm16 { @@ -2129,8 +1384,8 @@ impl Inst { CmpOp::CmpL64 => "clgit", _ => unreachable!(), }; - let rn = rn.show_rru(mb_rru); - let cond = cond.show_rru(mb_rru); + let rn = pretty_print_reg(rn, allocs); + let cond = cond.pretty_print_default(); format!("{}{} {}, {}", op, cond, rn, imm) } &Inst::AtomicRmw { @@ -2153,12 +1408,11 @@ impl Inst { _ => unreachable!(), }; - let (mem_str, mem) = - mem_finalize_for_show(mem, mb_rru, state, false, true, false, false); - - let rd = rd.to_reg().show_rru(mb_rru); - let rn = rn.show_rru(mb_rru); - let mem = mem.show_rru(mb_rru); + let rd = pretty_print_reg(rd.to_reg(), allocs); + let rn = pretty_print_reg(rn, allocs); + let mem = mem.with_allocs(allocs); + let (mem_str, mem) = mem_finalize_for_show(&mem, state, false, true, false, false); + let mem = mem.pretty_print_default(); format!("{}{} {}, {}, {}", mem_str, op, rd, rn, mem) } &Inst::AtomicCas32 { rd, rn, ref mem } | &Inst::AtomicCas64 { rd, rn, ref mem } => { @@ -2168,25 +1422,24 @@ impl Inst { _ => unreachable!(), }; + let rd = pretty_print_reg(rd.to_reg(), allocs); + let rn = pretty_print_reg(rn, allocs); + let mem = mem.with_allocs(allocs); let (mem_str, mem) = mem_finalize_for_show( - mem, - mb_rru, + &mem, state, opcode_rs.is_some(), opcode_rsy.is_some(), false, false, ); - let op = match &mem { &MemArg::BXD12 { .. } => opcode_rs, &MemArg::BXD20 { .. } => opcode_rsy, _ => unreachable!(), }; + let mem = mem.pretty_print_default(); - let rd = rd.to_reg().show_rru(mb_rru); - let rn = rn.show_rru(mb_rru); - let mem = mem.show_rru(mb_rru); format!("{}{} {}, {}, {}", mem_str, op.unwrap(), rd, rn, mem) } &Inst::Fence => "bcr 14, 0".to_string(), @@ -2228,38 +1481,35 @@ impl Inst { _ => unreachable!(), }; + let rd = pretty_print_reg(rd.to_reg(), allocs); + let mem = mem.with_allocs(allocs); let (mem_str, mem) = mem_finalize_for_show( - mem, - mb_rru, + &mem, state, opcode_rx.is_some(), opcode_rxy.is_some(), opcode_ril.is_some(), true, ); - let op = match &mem { &MemArg::BXD12 { .. } => opcode_rx, &MemArg::BXD20 { .. } => opcode_rxy, &MemArg::Label { .. } | &MemArg::Symbol { .. } => opcode_ril, _ => unreachable!(), }; - - let rd = rd.to_reg().show_rru(mb_rru); - let mem = mem.show_rru(mb_rru); + let mem = mem.pretty_print_default(); format!("{}{} {}, {}", mem_str, op.unwrap(), rd, mem) } &Inst::FpuLoadRev32 { rd, ref mem } | &Inst::FpuLoadRev64 { rd, ref mem } => { - let (mem_str, mem) = - mem_finalize_for_show(mem, mb_rru, state, true, false, false, true); - + let rd = pretty_print_reg(rd.to_reg(), allocs); + let mem = mem.with_allocs(allocs); + let (mem_str, mem) = mem_finalize_for_show(&mem, state, true, false, false, true); let op = match self { &Inst::FpuLoadRev32 { .. } => "vlebrf", &Inst::FpuLoadRev64 { .. } => "vlebrg", _ => unreachable!(), }; - let rd = rd.to_reg().show_rru(mb_rru); - let mem = mem.show_rru(mb_rru); + let mem = mem.pretty_print_default(); format!("{}{} {}, {}, 0", mem_str, op, rd, mem) } &Inst::Store8 { rd, ref mem } @@ -2284,111 +1534,110 @@ impl Inst { _ => unreachable!(), }; + let rd = pretty_print_reg(rd, allocs); + let mem = mem.with_allocs(allocs); let (mem_str, mem) = mem_finalize_for_show( - mem, - mb_rru, + &mem, state, opcode_rx.is_some(), opcode_rxy.is_some(), opcode_ril.is_some(), true, ); - let op = match &mem { &MemArg::BXD12 { .. } => opcode_rx, &MemArg::BXD20 { .. } => opcode_rxy, &MemArg::Label { .. } | &MemArg::Symbol { .. } => opcode_ril, _ => unreachable!(), }; + let mem = mem.pretty_print_default(); - let rd = rd.show_rru(mb_rru); - let mem = mem.show_rru(mb_rru); format!("{}{} {}, {}", mem_str, op.unwrap(), rd, mem) } &Inst::StoreImm8 { imm, ref mem } => { - let (mem_str, mem) = - mem_finalize_for_show(mem, mb_rru, state, true, true, false, false); + let mem = mem.with_allocs(allocs); + let (mem_str, mem) = mem_finalize_for_show(&mem, state, true, true, false, false); let op = match &mem { &MemArg::BXD12 { .. } => "mvi", &MemArg::BXD20 { .. } => "mviy", _ => unreachable!(), }; + let mem = mem.pretty_print_default(); - let mem = mem.show_rru(mb_rru); format!("{}{} {}, {}", mem_str, op, mem, imm) } &Inst::StoreImm16 { imm, ref mem } | &Inst::StoreImm32SExt16 { imm, ref mem } | &Inst::StoreImm64SExt16 { imm, ref mem } => { - let (mem_str, mem) = - mem_finalize_for_show(mem, mb_rru, state, false, true, false, false); + let mem = mem.with_allocs(allocs); + let (mem_str, mem) = mem_finalize_for_show(&mem, state, false, true, false, false); let op = match self { &Inst::StoreImm16 { .. } => "mvhhi", &Inst::StoreImm32SExt16 { .. } => "mvhi", &Inst::StoreImm64SExt16 { .. } => "mvghi", _ => unreachable!(), }; + let mem = mem.pretty_print_default(); - let mem = mem.show_rru(mb_rru); format!("{}{} {}, {}", mem_str, op, mem, imm) } &Inst::FpuStoreRev32 { rd, ref mem } | &Inst::FpuStoreRev64 { rd, ref mem } => { - let (mem_str, mem) = - mem_finalize_for_show(mem, mb_rru, state, true, false, false, true); - + let rd = pretty_print_reg(rd, allocs); + let mem = mem.with_allocs(allocs); + let (mem_str, mem) = mem_finalize_for_show(&mem, state, true, false, false, true); let op = match self { &Inst::FpuStoreRev32 { .. } => "vstebrf", &Inst::FpuStoreRev64 { .. } => "vstebrg", _ => unreachable!(), }; - let rd = rd.show_rru(mb_rru); - let mem = mem.show_rru(mb_rru); + let mem = mem.pretty_print_default(); + format!("{}{} {}, {}, 0", mem_str, op, rd, mem) } &Inst::LoadMultiple64 { rt, rt2, ref mem } => { - let (mem_str, mem) = - mem_finalize_for_show(mem, mb_rru, state, false, true, false, false); - let rt = rt.show_rru(mb_rru); - let rt2 = rt2.show_rru(mb_rru); - let mem = mem.show_rru(mb_rru); + let mem = mem.with_allocs(allocs); + let (mem_str, mem) = mem_finalize_for_show(&mem, state, false, true, false, false); + let rt = pretty_print_reg(rt.to_reg(), &mut empty_allocs); + let rt2 = pretty_print_reg(rt2.to_reg(), &mut empty_allocs); + let mem = mem.pretty_print_default(); format!("{}lmg {}, {}, {}", mem_str, rt, rt2, mem) } &Inst::StoreMultiple64 { rt, rt2, ref mem } => { - let (mem_str, mem) = - mem_finalize_for_show(mem, mb_rru, state, false, true, false, false); - let rt = rt.show_rru(mb_rru); - let rt2 = rt2.show_rru(mb_rru); - let mem = mem.show_rru(mb_rru); + let mem = mem.with_allocs(allocs); + let (mem_str, mem) = mem_finalize_for_show(&mem, state, false, true, false, false); + let rt = pretty_print_reg(rt, &mut empty_allocs); + let rt2 = pretty_print_reg(rt2, &mut empty_allocs); + let mem = mem.pretty_print_default(); format!("{}stmg {}, {}, {}", mem_str, rt, rt2, mem) } &Inst::Mov64 { rd, rm } => { - let rd = rd.to_reg().show_rru(mb_rru); - let rm = rm.show_rru(mb_rru); + let rd = pretty_print_reg(rd.to_reg(), allocs); + let rm = pretty_print_reg(rm, allocs); format!("lgr {}, {}", rd, rm) } &Inst::Mov32 { rd, rm } => { - let rd = rd.to_reg().show_rru(mb_rru); - let rm = rm.show_rru(mb_rru); + let rd = pretty_print_reg(rd.to_reg(), allocs); + let rm = pretty_print_reg(rm, allocs); format!("lr {}, {}", rd, rm) } &Inst::Mov32Imm { rd, ref imm } => { - let rd = rd.to_reg().show_rru(mb_rru); + let rd = pretty_print_reg(rd.to_reg(), allocs); format!("iilf {}, {}", rd, imm) } &Inst::Mov32SImm16 { rd, ref imm } => { - let rd = rd.to_reg().show_rru(mb_rru); + let rd = pretty_print_reg(rd.to_reg(), allocs); format!("lhi {}, {}", rd, imm) } &Inst::Mov64SImm16 { rd, ref imm } => { - let rd = rd.to_reg().show_rru(mb_rru); + let rd = pretty_print_reg(rd.to_reg(), allocs); format!("lghi {}, {}", rd, imm) } &Inst::Mov64SImm32 { rd, ref imm } => { - let rd = rd.to_reg().show_rru(mb_rru); + let rd = pretty_print_reg(rd.to_reg(), allocs); format!("lgfi {}, {}", rd, imm) } &Inst::Mov64UImm16Shifted { rd, ref imm } => { - let rd = rd.to_reg().show_rru(mb_rru); + let rd = pretty_print_reg(rd.to_reg(), allocs); let op = match imm.shift { 0 => "llill", 1 => "llilh", @@ -2399,7 +1648,7 @@ impl Inst { format!("{} {}, {}", op, rd, imm.bits) } &Inst::Mov64UImm32Shifted { rd, ref imm } => { - let rd = rd.to_reg().show_rru(mb_rru); + let rd = pretty_print_reg(rd.to_reg(), allocs); let op = match imm.shift { 0 => "llilf", 1 => "llihf", @@ -2408,7 +1657,7 @@ impl Inst { format!("{} {}, {}", op, rd, imm.bits) } &Inst::Insert64UImm16Shifted { rd, ref imm } => { - let rd = rd.to_reg().show_rru(mb_rru); + let rd = pretty_print_reg(rd.to_reg(), allocs); let op = match imm.shift { 0 => "iill", 1 => "iilh", @@ -2419,7 +1668,7 @@ impl Inst { format!("{} {}, {}", op, rd, imm.bits) } &Inst::Insert64UImm32Shifted { rd, ref imm } => { - let rd = rd.to_reg().show_rru(mb_rru); + let rd = pretty_print_reg(rd.to_reg(), allocs); let op = match imm.shift { 0 => "iilf", 1 => "iihf", @@ -2428,57 +1677,57 @@ impl Inst { format!("{} {}, {}", op, rd, imm.bits) } &Inst::CMov32 { rd, cond, rm } => { - let rd = rd.to_reg().show_rru(mb_rru); - let rm = rm.show_rru(mb_rru); - let cond = cond.show_rru(mb_rru); + let rd = pretty_print_reg(rd.to_reg(), allocs); + let rm = pretty_print_reg(rm, allocs); + let cond = cond.pretty_print_default(); format!("locr{} {}, {}", cond, rd, rm) } &Inst::CMov64 { rd, cond, rm } => { - let rd = rd.to_reg().show_rru(mb_rru); - let rm = rm.show_rru(mb_rru); - let cond = cond.show_rru(mb_rru); + let rd = pretty_print_reg(rd.to_reg(), allocs); + let rm = pretty_print_reg(rm, allocs); + let cond = cond.pretty_print_default(); format!("locgr{} {}, {}", cond, rd, rm) } &Inst::CMov32SImm16 { rd, cond, ref imm } => { - let rd = rd.to_reg().show_rru(mb_rru); - let cond = cond.show_rru(mb_rru); + let rd = pretty_print_reg(rd.to_reg(), allocs); + let cond = cond.pretty_print_default(); format!("lochi{} {}, {}", cond, rd, imm) } &Inst::CMov64SImm16 { rd, cond, ref imm } => { - let rd = rd.to_reg().show_rru(mb_rru); - let cond = cond.show_rru(mb_rru); + let rd = pretty_print_reg(rd.to_reg(), allocs); + let cond = cond.pretty_print_default(); format!("locghi{} {}, {}", cond, rd, imm) } &Inst::FpuMove32 { rd, rn } => { - let rd = rd.to_reg().show_rru(mb_rru); - let rn = rn.show_rru(mb_rru); + let rd = pretty_print_reg(rd.to_reg(), allocs); + let rn = pretty_print_reg(rn, allocs); format!("ler {}, {}", rd, rn) } &Inst::FpuMove64 { rd, rn } => { - let rd = rd.to_reg().show_rru(mb_rru); - let rn = rn.show_rru(mb_rru); + let rd = pretty_print_reg(rd.to_reg(), allocs); + let rn = pretty_print_reg(rn, allocs); format!("ldr {}, {}", rd, rn) } &Inst::FpuCMov32 { rd, cond, rm } => { - let rd = rd.to_reg().show_rru(mb_rru); - let rm = rm.show_rru(mb_rru); - let cond = cond.invert().show_rru(mb_rru); + let rd = pretty_print_reg(rd.to_reg(), allocs); + let rm = pretty_print_reg(rm, allocs); + let cond = cond.invert().pretty_print_default(); format!("j{} 6 ; ler {}, {}", cond, rd, rm) } &Inst::FpuCMov64 { rd, cond, rm } => { - let rd = rd.to_reg().show_rru(mb_rru); - let rm = rm.show_rru(mb_rru); - let cond = cond.invert().show_rru(mb_rru); + let rd = pretty_print_reg(rd.to_reg(), allocs); + let rm = pretty_print_reg(rm, allocs); + let cond = cond.invert().pretty_print_default(); format!("j{} 6 ; ldr {}, {}", cond, rd, rm) } &Inst::MovToFpr { rd, rn } => { - let rd = rd.to_reg().show_rru(mb_rru); - let rn = rn.show_rru(mb_rru); + let rd = pretty_print_reg(rd.to_reg(), allocs); + let rn = pretty_print_reg(rn, allocs); format!("ldgr {}, {}", rd, rn) } &Inst::MovFromFpr { rd, rn } => { - let rd = rd.to_reg().show_rru(mb_rru); - let rn = rn.show_rru(mb_rru); + let rd = pretty_print_reg(rd.to_reg(), allocs); + let rn = pretty_print_reg(rn, allocs); format!("lgdr {}, {}", rd, rn) } &Inst::FpuRR { fpu_op, rd, rn } => { @@ -2494,8 +1743,8 @@ impl Inst { FPUOp1::Cvt32To64 => "ldebr", FPUOp1::Cvt64To32 => "ledbr", }; - let rd = rd.to_reg().show_rru(mb_rru); - let rn = rn.show_rru(mb_rru); + let rd = pretty_print_reg(rd.to_reg(), allocs); + let rn = pretty_print_reg(rn, allocs); format!("{} {}, {}", op, rd, rn) } &Inst::FpuRRR { fpu_op, rd, rm } => { @@ -2510,8 +1759,8 @@ impl Inst { FPUOp2::Div64 => "ddbr", _ => unimplemented!(), }; - let rd = rd.to_reg().show_rru(mb_rru); - let rm = rm.show_rru(mb_rru); + let rd = pretty_print_reg(rd.to_reg(), allocs); + let rm = pretty_print_reg(rm, allocs); format!("{} {}, {}", op, rd, rm) } &Inst::FpuRRRR { fpu_op, rd, rn, rm } => { @@ -2521,30 +1770,30 @@ impl Inst { FPUOp3::MSub32 => "msebr", FPUOp3::MSub64 => "msdbr", }; - let rd = rd.to_reg().show_rru(mb_rru); - let rn = rn.show_rru(mb_rru); - let rm = rm.show_rru(mb_rru); + let rd = pretty_print_reg(rd.to_reg(), allocs); + let rn = pretty_print_reg(rn, allocs); + let rm = pretty_print_reg(rm, allocs); format!("{} {}, {}, {}", op, rd, rn, rm) } &Inst::FpuCopysign { rd, rn, rm } => { - let rd = rd.to_reg().show_rru(mb_rru); - let rn = rn.show_rru(mb_rru); - let rm = rm.show_rru(mb_rru); + let rd = pretty_print_reg(rd.to_reg(), allocs); + let rn = pretty_print_reg(rn, allocs); + let rm = pretty_print_reg(rm, allocs); format!("cpsdr {}, {}, {}", rd, rm, rn) } &Inst::FpuCmp32 { rn, rm } => { - let rn = rn.show_rru(mb_rru); - let rm = rm.show_rru(mb_rru); + let rn = pretty_print_reg(rn, allocs); + let rm = pretty_print_reg(rm, allocs); format!("cebr {}, {}", rn, rm) } &Inst::FpuCmp64 { rn, rm } => { - let rn = rn.show_rru(mb_rru); - let rm = rm.show_rru(mb_rru); + let rn = pretty_print_reg(rn, allocs); + let rm = pretty_print_reg(rm, allocs); format!("cdbr {}, {}", rn, rm) } &Inst::LoadFpuConst32 { rd, const_data } => { - let rd = rd.to_reg().show_rru(mb_rru); - let tmp = writable_spilltmp_reg().to_reg().show_rru(mb_rru); + let rd = pretty_print_reg(rd.to_reg(), allocs); + let tmp = pretty_print_reg(writable_spilltmp_reg().to_reg(), &mut empty_allocs); format!( "bras {}, 8 ; data.f32 {} ; le {}, 0({})", tmp, @@ -2554,8 +1803,8 @@ impl Inst { ) } &Inst::LoadFpuConst64 { rd, const_data } => { - let rd = rd.to_reg().show_rru(mb_rru); - let tmp = writable_spilltmp_reg().to_reg().show_rru(mb_rru); + let rd = pretty_print_reg(rd.to_reg(), allocs); + let tmp = pretty_print_reg(writable_spilltmp_reg().to_reg(), &mut empty_allocs); format!( "bras {}, 12 ; data.f64 {} ; ld {}, 0({})", tmp, @@ -2575,8 +1824,8 @@ impl Inst { FpuToIntOp::F64ToI64 => "cgdbra", FpuToIntOp::F64ToU64 => "clgdbr", }; - let rd = rd.to_reg().show_rru(mb_rru); - let rn = rn.show_rru(mb_rru); + let rd = pretty_print_reg(rd.to_reg(), allocs); + let rn = pretty_print_reg(rn, allocs); format!("{} {}, 5, {}, 0", op, rd, rn) } &Inst::IntToFpu { op, rd, rn } => { @@ -2590,8 +1839,8 @@ impl Inst { IntToFpuOp::I64ToF64 => "cdgbra", IntToFpuOp::U64ToF64 => "cdlgbr", }; - let rd = rd.to_reg().show_rru(mb_rru); - let rn = rn.show_rru(mb_rru); + let rd = pretty_print_reg(rd.to_reg(), allocs); + let rn = pretty_print_reg(rn, allocs); format!("{} {}, 0, {}, 0", op, rd, rn) } &Inst::FpuRound { op, rd, rn } => { @@ -2605,8 +1854,8 @@ impl Inst { FpuRoundMode::Nearest32 => ("fiebr", 4), FpuRoundMode::Nearest64 => ("fidbr", 4), }; - let rd = rd.to_reg().show_rru(mb_rru); - let rn = rn.show_rru(mb_rru); + let rd = pretty_print_reg(rd.to_reg(), allocs); + let rn = pretty_print_reg(rn, allocs); format!("{} {}, {}, {}", op, rd, rn, m3) } &Inst::FpuVecRRR { fpu_op, rd, rn, rm } => { @@ -2617,9 +1866,9 @@ impl Inst { FPUOp2::Min64 => "wfmindb", _ => unimplemented!(), }; - let rd = rd.to_reg().show_rru(mb_rru); - let rn = rn.show_rru(mb_rru); - let rm = rm.show_rru(mb_rru); + let rd = pretty_print_reg(rd.to_reg(), allocs); + let rn = pretty_print_reg(rn, allocs); + let rm = pretty_print_reg(rm, allocs); format!("{} {}, {}, {}, 1", op, rd, rn, rm) } &Inst::Extend { @@ -2629,8 +1878,8 @@ impl Inst { from_bits, to_bits, } => { - let rd = rd.to_reg().show_rru(mb_rru); - let rn = rn.show_rru(mb_rru); + let rd = pretty_print_reg(rd.to_reg(), allocs); + let rn = pretty_print_reg(rn, allocs); let op = match (signed, from_bits, to_bits) { (_, 1, 32) => "llcr", (_, 1, 64) => "llgcr", @@ -2649,16 +1898,16 @@ impl Inst { format!("{} {}, {}", op, rd, rn) } &Inst::Call { link, ref info, .. } => { - let link = link.show_rru(mb_rru); + let link = pretty_print_reg(link.to_reg(), allocs); format!("brasl {}, {}", link, info.dest) } &Inst::CallInd { link, ref info, .. } => { - let link = link.show_rru(mb_rru); - let rn = info.rn.show_rru(mb_rru); + let link = pretty_print_reg(link.to_reg(), allocs); + let rn = pretty_print_reg(info.rn, allocs); format!("basr {}, {}", link, rn) } - &Inst::Ret { link } => { - let link = link.show_rru(mb_rru); + &Inst::Ret { link, .. } => { + let link = pretty_print_reg(link, allocs); format!("br {}", link) } &Inst::EpiloguePlaceholder => "epilogue placeholder".to_string(), @@ -2667,7 +1916,7 @@ impl Inst { format!("jg {}", dest) } &Inst::IndirectBr { rn, .. } => { - let rn = rn.show_rru(mb_rru); + let rn = pretty_print_reg(rn, allocs); format!("br {}", rn) } &Inst::CondBr { @@ -2677,23 +1926,23 @@ impl Inst { } => { let taken = taken.to_string(); let not_taken = not_taken.to_string(); - let cond = cond.show_rru(mb_rru); + let cond = cond.pretty_print_default(); format!("jg{} {} ; jg {}", cond, taken, not_taken) } &Inst::OneWayCondBr { target, cond } => { let target = target.to_string(); - let cond = cond.show_rru(mb_rru); + let cond = cond.pretty_print_default(); format!("jg{} {}", cond, target) } &Inst::Debugtrap => "debugtrap".to_string(), &Inst::Trap { .. } => "trap".to_string(), &Inst::TrapIf { cond, .. } => { - let cond = cond.invert().show_rru(mb_rru); + let cond = cond.invert().pretty_print_default(); format!("j{} 6 ; trap", cond) } &Inst::JTSequence { ridx, ref targets } => { - let ridx = ridx.show_rru(mb_rru); - let rtmp = writable_spilltmp_reg().to_reg().show_rru(mb_rru); + let ridx = pretty_print_reg(ridx, allocs); + let rtmp = pretty_print_reg(writable_spilltmp_reg().to_reg(), &mut empty_allocs); // The first entry is the default target, which is not emitted // into the jump table, so we skip it here. It is only in the // list so MachTerminator will see the potential target. @@ -2717,50 +1966,51 @@ impl Inst { ref name, offset, } => { - let rd = rd.show_rru(mb_rru); - let tmp = writable_spilltmp_reg().to_reg().show_rru(mb_rru); + let rd = pretty_print_reg(rd.to_reg(), allocs); + let tmp = pretty_print_reg(writable_spilltmp_reg().to_reg(), &mut empty_allocs); format!( "bras {}, 12 ; data {} + {} ; lg {}, 0({})", tmp, name, offset, rd, tmp ) } &Inst::LoadAddr { rd, ref mem } => { - let (mem_str, mem) = - mem_finalize_for_show(mem, mb_rru, state, true, true, true, true); - + let rd = pretty_print_reg(rd.to_reg(), allocs); + let mem = mem.with_allocs(allocs); + let (mem_str, mem) = mem_finalize_for_show(&mem, state, true, true, true, true); let op = match &mem { &MemArg::BXD12 { .. } => "la", &MemArg::BXD20 { .. } => "lay", &MemArg::Label { .. } | &MemArg::Symbol { .. } => "larl", _ => unreachable!(), }; - let rd = rd.show_rru(mb_rru); - let mem = mem.show_rru(mb_rru); + let mem = mem.pretty_print_default(); + format!("{}{} {}, {}", mem_str, op, rd, mem) } &Inst::Loop { ref body, cond } => { let body = body .into_iter() - .map(|inst| inst.show_rru(mb_rru)) + .map(|inst| inst.print_with_state(state, allocs)) .collect::>() .join(" ; "); - let cond = cond.show_rru(mb_rru); + let cond = cond.pretty_print_default(); format!("0: {} ; jg{} 0b ; 1:", body, cond) } &Inst::CondBreak { cond } => { - let cond = cond.show_rru(mb_rru); + let cond = cond.pretty_print_default(); format!("jg{} 1f", cond) } &Inst::VirtualSPOffsetAdj { offset } => { state.virtual_sp_offset += offset; format!("virtual_sp_offset_adjust {}", offset) } - &Inst::ValueLabelMarker { label, reg } => { - format!("value_label {:?}, {}", label, reg.show_rru(mb_rru)) - } &Inst::Unwind { ref inst } => { format!("unwind {:?}", inst) } + &Inst::DummyUse { reg } => { + let reg = pretty_print_reg(reg, allocs); + format!("dummy_use {}", reg) + } } } } diff --git a/cranelift/codegen/src/isa/s390x/inst/regs.rs b/cranelift/codegen/src/isa/s390x/inst/regs.rs index 3ebba43bda..179e8bdc9f 100644 --- a/cranelift/codegen/src/isa/s390x/inst/regs.rs +++ b/cranelift/codegen/src/isa/s390x/inst/regs.rs @@ -1,7 +1,12 @@ //! S390x ISA definitions: registers. +use alloc::string::String; +use regalloc2::MachineEnv; +use regalloc2::PReg; +use regalloc2::VReg; + +use crate::machinst::*; use crate::settings; -use regalloc::{RealRegUniverse, Reg, RegClass, RegClassInfo, Writable, NUM_REG_CLASSES}; //============================================================================= // Registers, the Universe thereof, and printing @@ -29,11 +34,8 @@ const FPR_INDICES: [u8; 16] = [ /// Get a reference to a GPR (integer register). pub fn gpr(num: u8) -> Reg { assert!(num < 16); - Reg::new_real( - RegClass::I64, - /* enc = */ num, - /* index = */ GPR_INDICES[num as usize], - ) + let preg = PReg::new(num as usize, RegClass::Int); + Reg::from(VReg::new(preg.index(), RegClass::Int)) } /// Get a writable reference to a GPR. @@ -44,11 +46,8 @@ pub fn writable_gpr(num: u8) -> Writable { /// Get a reference to a FPR (floating-point register). pub fn fpr(num: u8) -> Reg { assert!(num < 16); - Reg::new_real( - RegClass::F64, - /* enc = */ num, - /* index = */ FPR_INDICES[num as usize], - ) + let preg = PReg::new(num as usize, RegClass::Float); + Reg::from(VReg::new(preg.index(), RegClass::Float)) } /// Get a writable reference to a V-register. @@ -88,81 +87,73 @@ pub fn zero_reg() -> Reg { } /// Create the register universe for AArch64. -pub fn create_reg_universe(_flags: &settings::Flags) -> RealRegUniverse { - let mut regs = vec![]; - let mut allocable_by_class = [None; NUM_REG_CLASSES]; - - // Numbering Scheme: we put FPRs first, then GPRs. The GPRs exclude several registers: - // r0 (we cannot use this for addressing // FIXME regalloc) - // r1 (spilltmp) - // r15 (stack pointer) - - // FPRs. - let mut base = regs.len(); - regs.push((fpr(0).to_real_reg(), "%f0".into())); - regs.push((fpr(2).to_real_reg(), "%f2".into())); - regs.push((fpr(4).to_real_reg(), "%f4".into())); - regs.push((fpr(6).to_real_reg(), "%f6".into())); - regs.push((fpr(1).to_real_reg(), "%f1".into())); - regs.push((fpr(3).to_real_reg(), "%f3".into())); - regs.push((fpr(5).to_real_reg(), "%f5".into())); - regs.push((fpr(7).to_real_reg(), "%f7".into())); - regs.push((fpr(8).to_real_reg(), "%f8".into())); - regs.push((fpr(10).to_real_reg(), "%f10".into())); - regs.push((fpr(12).to_real_reg(), "%f12".into())); - regs.push((fpr(14).to_real_reg(), "%f14".into())); - regs.push((fpr(9).to_real_reg(), "%f9".into())); - regs.push((fpr(11).to_real_reg(), "%f11".into())); - regs.push((fpr(13).to_real_reg(), "%f13".into())); - regs.push((fpr(15).to_real_reg(), "%f15".into())); - - allocable_by_class[RegClass::F64.rc_to_usize()] = Some(RegClassInfo { - first: base, - last: regs.len() - 1, - suggested_scratch: Some(fpr(1).get_index()), - }); - - // Caller-saved GPRs in the SystemV s390x ABI. - base = regs.len(); - regs.push((gpr(2).to_real_reg(), "%r2".into())); - regs.push((gpr(3).to_real_reg(), "%r3".into())); - regs.push((gpr(4).to_real_reg(), "%r4".into())); - regs.push((gpr(5).to_real_reg(), "%r5".into())); - - // Callee-saved GPRs in the SystemV s390x ABI. - // We start from r14 downwards in an attempt to allow the - // prolog to use as short a STMG as possible. - regs.push((gpr(14).to_real_reg(), "%r14".into())); - regs.push((gpr(13).to_real_reg(), "%r13".into())); - regs.push((gpr(12).to_real_reg(), "%r12".into())); - regs.push((gpr(11).to_real_reg(), "%r11".into())); - regs.push((gpr(10).to_real_reg(), "%r10".into())); - regs.push((gpr(9).to_real_reg(), "%r9".into())); - regs.push((gpr(8).to_real_reg(), "%r8".into())); - regs.push((gpr(7).to_real_reg(), "%r7".into())); - regs.push((gpr(6).to_real_reg(), "%r6".into())); - - allocable_by_class[RegClass::I64.rc_to_usize()] = Some(RegClassInfo { - first: base, - last: regs.len() - 1, - suggested_scratch: Some(gpr(13).get_index()), - }); - - // Other regs, not available to the allocator. - let allocable = regs.len(); - regs.push((gpr(15).to_real_reg(), "%r15".into())); - regs.push((gpr(0).to_real_reg(), "%r0".into())); - regs.push((gpr(1).to_real_reg(), "%r1".into())); - - // Assert sanity: the indices in the register structs must match their - // actual indices in the array. - for (i, reg) in regs.iter().enumerate() { - assert_eq!(i, reg.0.get_index()); +pub fn create_machine_env(_flags: &settings::Flags) -> MachineEnv { + fn preg(r: Reg) -> PReg { + r.to_real_reg().unwrap().into() } - RealRegUniverse { - regs, - allocable, - allocable_by_class, + MachineEnv { + preferred_regs_by_class: [ + vec![ + // no r0; can't use for addressing? + // no r1; it is our spilltmp. + preg(gpr(2)), + preg(gpr(3)), + preg(gpr(4)), + preg(gpr(5)), + ], + vec![ + preg(fpr(0)), + preg(fpr(1)), + preg(fpr(2)), + preg(fpr(3)), + preg(fpr(4)), + preg(fpr(5)), + preg(fpr(6)), + preg(fpr(7)), + ], + ], + non_preferred_regs_by_class: [ + vec![ + preg(gpr(6)), + preg(gpr(7)), + preg(gpr(8)), + preg(gpr(9)), + preg(gpr(10)), + preg(gpr(11)), + preg(gpr(12)), + // no r13; it is our scratch reg. + preg(gpr(14)), + // no r15; it is the stack pointer. + ], + vec![ + preg(fpr(8)), + preg(fpr(9)), + preg(fpr(10)), + preg(fpr(11)), + preg(fpr(12)), + preg(fpr(13)), + preg(fpr(14)), + // no f15; it is our scratch reg. + ], + ], + scratch_by_class: [preg(gpr(13)), preg(fpr(15))], + fixed_stack_slots: vec![], } } + +pub fn show_reg(reg: Reg) -> String { + if let Some(rreg) = reg.to_real_reg() { + match rreg.class() { + RegClass::Int => format!("%r{}", rreg.hw_enc()), + RegClass::Float => format!("%f{}", rreg.hw_enc()), + } + } else { + format!("%{:?}", reg) + } +} + +pub fn pretty_print_reg(reg: Reg, allocs: &mut AllocationConsumer<'_>) -> String { + let reg = allocs.next(reg); + show_reg(reg) +} diff --git a/cranelift/codegen/src/isa/s390x/inst/unwind/systemv.rs b/cranelift/codegen/src/isa/s390x/inst/unwind/systemv.rs index a970c59c69..8abf6c84a2 100644 --- a/cranelift/codegen/src/isa/s390x/inst/unwind/systemv.rs +++ b/cranelift/codegen/src/isa/s390x/inst/unwind/systemv.rs @@ -1,8 +1,8 @@ //! Unwind information for System V ABI (s390x). use crate::isa::unwind::systemv::RegisterMappingError; +use crate::machinst::{Reg, RegClass}; use gimli::{write::CommonInformationEntry, Encoding, Format, Register}; -use regalloc::{Reg, RegClass}; /// Creates a new s390x common information entry (CIE). pub fn create_cie() -> CommonInformationEntry { @@ -64,10 +64,9 @@ pub fn map_reg(reg: Reg) -> Result { Register(31), ]; - match reg.get_class() { - RegClass::I64 => Ok(GPR_MAP[reg.get_hw_encoding() as usize]), - RegClass::F64 => Ok(FPR_MAP[reg.get_hw_encoding() as usize]), - _ => Err(RegisterMappingError::UnsupportedRegisterBank("class?")), + match reg.class() { + RegClass::Int => Ok(GPR_MAP[reg.to_real_reg().unwrap().hw_enc() as usize]), + RegClass::Float => Ok(FPR_MAP[reg.to_real_reg().unwrap().hw_enc() as usize]), } } diff --git a/cranelift/codegen/src/isa/s390x/lower.isle b/cranelift/codegen/src/isa/s390x/lower.isle index d60f1e4e63..54ced09644 100644 --- a/cranelift/codegen/src/isa/s390x/lower.isle +++ b/cranelift/codegen/src/isa/s390x/lower.isle @@ -2137,31 +2137,31 @@ ;;;; Rules for `trap` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule (lower (trap trap_code)) - (safepoint (trap_impl trap_code))) + (side_effect (trap_impl trap_code))) ;;;; Rules for `resumable_trap` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule (lower (resumable_trap trap_code)) - (safepoint (trap_impl trap_code))) + (side_effect (trap_impl trap_code))) ;;;; Rules for `trapz` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule (lower (trapz val trap_code)) - (safepoint (trap_if_bool (invert_bool (value_nonzero val)) trap_code))) + (side_effect (trap_if_bool (invert_bool (value_nonzero val)) trap_code))) ;;;; Rules for `trapnz` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule (lower (trapnz val trap_code)) - (safepoint (trap_if_bool (value_nonzero val) trap_code))) + (side_effect (trap_if_bool (value_nonzero val) trap_code))) ;;;; Rules for `resumable_trapnz` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule (lower (resumable_trapnz val trap_code)) - (safepoint (trap_if_bool (value_nonzero val) trap_code))) + (side_effect (trap_if_bool (value_nonzero val) trap_code))) ;;;; Rules for `debugtrap` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; @@ -2179,7 +2179,7 @@ ;; the desired comparison here; there is no separate `ifcmp` lowering. (rule (lower (trapif int_cc (ifcmp x y) trap_code)) - (safepoint (trap_if_bool (icmp_val $false int_cc x y) trap_code))) + (side_effect (trap_if_bool (icmp_val $false int_cc x y) trap_code))) ;; Recognize the case of `iadd_ifcout` feeding into `trapif`. Note that ;; in the case, the `iadd_ifcout` is generated by a separate lowering diff --git a/cranelift/codegen/src/isa/s390x/lower.rs b/cranelift/codegen/src/isa/s390x/lower.rs index 6979e884fa..39c7531392 100644 --- a/cranelift/codegen/src/isa/s390x/lower.rs +++ b/cranelift/codegen/src/isa/s390x/lower.rs @@ -11,7 +11,6 @@ use crate::machinst::lower::*; use crate::machinst::*; use crate::settings::Flags; use crate::CodegenResult; -use regalloc::Reg; use smallvec::SmallVec; pub mod isle; diff --git a/cranelift/codegen/src/isa/s390x/lower/isle.rs b/cranelift/codegen/src/isa/s390x/lower/isle.rs index 0d7d1fecfa..a9424d88c9 100644 --- a/cranelift/codegen/src/isa/s390x/lower/isle.rs +++ b/cranelift/codegen/src/isa/s390x/lower/isle.rs @@ -14,9 +14,8 @@ use crate::settings::Flags; use crate::{ ir::{ condcodes::*, immediates::*, types::*, AtomicRmwOp, Endianness, Inst, InstructionData, - StackSlot, TrapCode, Value, ValueLabel, ValueList, + StackSlot, TrapCode, Value, ValueList, }, - isa::s390x::inst::s390x_map_regs, isa::unwind::UnwindInst, machinst::{InsnOutput, LowerCtx}, }; @@ -43,15 +42,9 @@ pub(crate) fn lower( where C: LowerCtx, { - lower_common( - lower_ctx, - flags, - isa_flags, - outputs, - inst, - |cx, insn| generated_code::constructor_lower(cx, insn), - s390x_map_regs, - ) + lower_common(lower_ctx, flags, isa_flags, outputs, inst, |cx, insn| { + generated_code::constructor_lower(cx, insn) + }) } /// The main entry point for branch lowering with ISLE. @@ -65,15 +58,9 @@ pub(crate) fn lower_branch( where C: LowerCtx, { - lower_common( - lower_ctx, - flags, - isa_flags, - &[], - branch, - |cx, insn| generated_code::constructor_lower_branch(cx, insn, &targets.to_vec()), - s390x_map_regs, - ) + lower_common(lower_ctx, flags, isa_flags, &[], branch, |cx, insn| { + generated_code::constructor_lower_branch(cx, insn, &targets.to_vec()) + }) } impl generated_code::Context for IsleContext<'_, C, Flags, IsaFlags, 6> @@ -523,11 +510,6 @@ where #[inline] fn emit(&mut self, inst: &MInst) -> Unit { - self.emitted_insts.push((inst.clone(), false)); - } - - #[inline] - fn emit_safepoint(&mut self, inst: &MInst) -> Unit { - self.emitted_insts.push((inst.clone(), true)); + self.lower_ctx.emit(inst.clone()); } } diff --git a/cranelift/codegen/src/isa/s390x/lower/isle/generated_code.manifest b/cranelift/codegen/src/isa/s390x/lower/isle/generated_code.manifest index b2391d6887..aae4194934 100644 --- a/cranelift/codegen/src/isa/s390x/lower/isle/generated_code.manifest +++ b/cranelift/codegen/src/isa/s390x/lower/isle/generated_code.manifest @@ -1,4 +1,4 @@ src/clif.isle 443b34b797fc8ace -src/prelude.isle c0751050a11e2686 -src/isa/s390x/inst.isle d91a16074ab186a8 -src/isa/s390x/lower.isle 1cc5a12adc8c75f9 +src/prelude.isle afd037c4d91c875c +src/isa/s390x/inst.isle 8218bd9e8556446b +src/isa/s390x/lower.isle 6a8de81f8dc4e568 diff --git a/cranelift/codegen/src/isa/s390x/lower/isle/generated_code.rs b/cranelift/codegen/src/isa/s390x/lower/isle/generated_code.rs index 1997c856df..5a9d17ab09 100644 --- a/cranelift/codegen/src/isa/s390x/lower/isle/generated_code.rs +++ b/cranelift/codegen/src/isa/s390x/lower/isle/generated_code.rs @@ -79,7 +79,6 @@ pub trait Context { fn def_inst(&mut self, arg0: Value) -> Option; fn offset32_to_u32(&mut self, arg0: Offset32) -> u32; fn emit(&mut self, arg0: &MInst) -> Unit; - fn emit_safepoint(&mut self, arg0: &MInst) -> Unit; fn trap_code_division_by_zero(&mut self) -> TrapCode; fn trap_code_integer_overflow(&mut self) -> TrapCode; fn trap_code_bad_conversion_to_integer(&mut self) -> TrapCode; @@ -155,13 +154,13 @@ pub trait Context { fn same_reg(&mut self, arg0: Reg, arg1: WritableReg) -> Option<()>; } -/// Internal type SideEffectNoResult: defined at src/prelude.isle line 405. +/// Internal type SideEffectNoResult: defined at src/prelude.isle line 402. #[derive(Clone, Debug)] pub enum SideEffectNoResult { Inst { inst: MInst }, } -/// Internal type ProducesFlags: defined at src/prelude.isle line 427. +/// Internal type ProducesFlags: defined at src/prelude.isle line 418. #[derive(Clone, Debug)] pub enum ProducesFlags { ProducesFlagsSideEffect { inst: MInst }, @@ -169,7 +168,7 @@ pub enum ProducesFlags { ProducesFlagsReturnsResultWithConsumer { inst: MInst, result: Reg }, } -/// Internal type ConsumesFlags: defined at src/prelude.isle line 438. +/// Internal type ConsumesFlags: defined at src/prelude.isle line 429. #[derive(Clone, Debug)] pub enum ConsumesFlags { ConsumesFlagsReturnsResultWithProducer { @@ -666,6 +665,7 @@ pub enum MInst { }, Ret { link: Reg, + rets: VecReg, }, EpiloguePlaceholder, Jump { @@ -715,9 +715,8 @@ pub enum MInst { VirtualSPOffsetAdj { offset: i64, }, - ValueLabelMarker { + DummyUse { reg: Reg, - label: ValueLabel, }, Unwind { inst: UnwindInst, @@ -959,7 +958,7 @@ pub fn constructor_side_effect( inst: ref pattern1_0, } = pattern0_0 { - // Rule at src/prelude.isle line 410. + // Rule at src/prelude.isle line 407. let expr0_0 = C::emit(ctx, pattern1_0); let expr1_0 = C::output_none(ctx); return Some(expr1_0); @@ -967,24 +966,6 @@ pub fn constructor_side_effect( return None; } -// Generated as internal constructor for term safepoint. -pub fn constructor_safepoint( - ctx: &mut C, - arg0: &SideEffectNoResult, -) -> Option { - let pattern0_0 = arg0; - if let &SideEffectNoResult::Inst { - inst: ref pattern1_0, - } = pattern0_0 - { - // Rule at src/prelude.isle line 416. - let expr0_0 = C::emit_safepoint(ctx, pattern1_0); - let expr1_0 = C::output_none(ctx); - return Some(expr1_0); - } - return None; -} - // Generated as internal constructor for term produces_flags_get_reg. pub fn constructor_produces_flags_get_reg( ctx: &mut C, @@ -996,7 +977,7 @@ pub fn constructor_produces_flags_get_reg( result: pattern1_1, } = pattern0_0 { - // Rule at src/prelude.isle line 454. + // Rule at src/prelude.isle line 445. return Some(pattern1_1); } return None; @@ -1013,7 +994,7 @@ pub fn constructor_produces_flags_ignore( inst: ref pattern1_0, result: pattern1_1, } => { - // Rule at src/prelude.isle line 459. + // Rule at src/prelude.isle line 450. let expr0_0 = ProducesFlags::ProducesFlagsSideEffect { inst: pattern1_0.clone(), }; @@ -1023,7 +1004,7 @@ pub fn constructor_produces_flags_ignore( inst: ref pattern1_0, result: pattern1_1, } => { - // Rule at src/prelude.isle line 461. + // Rule at src/prelude.isle line 452. let expr0_0 = ProducesFlags::ProducesFlagsSideEffect { inst: pattern1_0.clone(), }; @@ -1052,7 +1033,7 @@ pub fn constructor_consumes_flags_concat( result: pattern3_1, } = pattern2_0 { - // Rule at src/prelude.isle line 468. + // Rule at src/prelude.isle line 459. let expr0_0 = C::value_regs(ctx, pattern1_1, pattern3_1); let expr1_0 = ConsumesFlags::ConsumesFlagsTwiceReturnsValueRegs { inst1: pattern1_0.clone(), @@ -1082,7 +1063,7 @@ pub fn constructor_with_flags( inst: ref pattern3_0, result: pattern3_1, } => { - // Rule at src/prelude.isle line 493. + // Rule at src/prelude.isle line 484. let expr0_0 = C::emit(ctx, pattern1_0); let expr1_0 = C::emit(ctx, pattern3_0); let expr2_0 = C::value_reg(ctx, pattern3_1); @@ -1093,7 +1074,7 @@ pub fn constructor_with_flags( inst2: ref pattern3_1, result: pattern3_2, } => { - // Rule at src/prelude.isle line 499. + // Rule at src/prelude.isle line 490. let expr0_0 = C::emit(ctx, pattern1_0); let expr1_0 = C::emit(ctx, pattern3_0); let expr2_0 = C::emit(ctx, pattern3_1); @@ -1106,7 +1087,7 @@ pub fn constructor_with_flags( inst4: ref pattern3_3, result: pattern3_4, } => { - // Rule at src/prelude.isle line 511. + // Rule at src/prelude.isle line 502. let expr0_0 = C::emit(ctx, pattern1_0); let expr1_0 = C::emit(ctx, pattern3_0); let expr2_0 = C::emit(ctx, pattern3_1); @@ -1127,7 +1108,7 @@ pub fn constructor_with_flags( result: pattern3_1, } = pattern2_0 { - // Rule at src/prelude.isle line 487. + // Rule at src/prelude.isle line 478. let expr0_0 = C::emit(ctx, pattern1_0); let expr1_0 = C::emit(ctx, pattern3_0); let expr2_0 = C::value_regs(ctx, pattern1_1, pattern3_1); @@ -1147,7 +1128,7 @@ pub fn constructor_with_flags_reg( ) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/prelude.isle line 528. + // Rule at src/prelude.isle line 519. let expr0_0 = constructor_with_flags(ctx, pattern0_0, pattern1_0)?; let expr1_0: usize = 0; let expr2_0 = C::value_regs_get(ctx, expr0_0, expr1_0); @@ -8098,13 +8079,13 @@ pub fn constructor_lower(ctx: &mut C, arg0: Inst) -> Option { // Rule at src/isa/s390x/lower.isle line 2139. let expr0_0 = constructor_trap_impl(ctx, pattern2_1)?; - let expr1_0 = constructor_safepoint(ctx, &expr0_0)?; + let expr1_0 = constructor_side_effect(ctx, &expr0_0)?; return Some(expr1_0); } &Opcode::ResumableTrap => { // Rule at src/isa/s390x/lower.isle line 2145. let expr0_0 = constructor_trap_impl(ctx, pattern2_1)?; - let expr1_0 = constructor_safepoint(ctx, &expr0_0)?; + let expr1_0 = constructor_side_effect(ctx, &expr0_0)?; return Some(expr1_0); } _ => {} @@ -8358,7 +8339,7 @@ pub fn constructor_lower(ctx: &mut C, arg0: Inst) -> Option { @@ -8391,21 +8372,21 @@ pub fn constructor_lower(ctx: &mut C, arg0: Inst) -> Option { // Rule at src/isa/s390x/lower.isle line 2157. let expr0_0 = constructor_value_nonzero(ctx, pattern2_1)?; let expr1_0 = constructor_trap_if_bool(ctx, &expr0_0, pattern2_2)?; - let expr2_0 = constructor_safepoint(ctx, &expr1_0)?; + let expr2_0 = constructor_side_effect(ctx, &expr1_0)?; return Some(expr2_0); } &Opcode::ResumableTrapnz => { // Rule at src/isa/s390x/lower.isle line 2163. let expr0_0 = constructor_value_nonzero(ctx, pattern2_1)?; let expr1_0 = constructor_trap_if_bool(ctx, &expr0_0, pattern2_2)?; - let expr2_0 = constructor_safepoint(ctx, &expr1_0)?; + let expr2_0 = constructor_side_effect(ctx, &expr1_0)?; return Some(expr2_0); } _ => {} diff --git a/cranelift/codegen/src/isa/s390x/mod.rs b/cranelift/codegen/src/isa/s390x/mod.rs index e9325c6341..3e9674d6f2 100644 --- a/cranelift/codegen/src/isa/s390x/mod.rs +++ b/cranelift/codegen/src/isa/s390x/mod.rs @@ -7,15 +7,13 @@ use crate::isa::s390x::settings as s390x_settings; use crate::isa::unwind::systemv::RegisterMappingError; use crate::isa::{Builder as IsaBuilder, TargetIsa}; use crate::machinst::{ - compile, MachCompileResult, MachTextSectionBuilder, TextSectionBuilder, VCode, + compile, MachCompileResult, MachTextSectionBuilder, Reg, TextSectionBuilder, VCode, }; use crate::result::CodegenResult; use crate::settings as shared_settings; - use alloc::{boxed::Box, vec::Vec}; use core::fmt; - -use regalloc::{PrettyPrint, RealRegUniverse, Reg}; +use regalloc2::MachineEnv; use target_lexicon::{Architecture, Triple}; // New backend: @@ -24,7 +22,7 @@ pub(crate) mod inst; mod lower; mod settings; -use inst::create_reg_universe; +use inst::create_machine_env; use self::inst::EmitInfo; @@ -33,7 +31,7 @@ pub struct S390xBackend { triple: Triple, flags: shared_settings::Flags, isa_flags: s390x_settings::Flags, - reg_universe: RealRegUniverse, + machine_env: MachineEnv, } impl S390xBackend { @@ -43,12 +41,12 @@ impl S390xBackend { flags: shared_settings::Flags, isa_flags: s390x_settings::Flags, ) -> S390xBackend { - let reg_universe = create_reg_universe(&flags); + let machine_env = create_machine_env(&flags); S390xBackend { triple, flags, isa_flags, - reg_universe, + machine_env, } } @@ -58,10 +56,10 @@ impl S390xBackend { &self, func: &Function, flags: shared_settings::Flags, - ) -> CodegenResult> { + ) -> CodegenResult<(VCode, regalloc2::Output)> { let emit_info = EmitInfo::new(flags.clone(), self.isa_flags.clone()); let abi = Box::new(abi::S390xABICallee::new(func, flags, self.isa_flags())?); - compile::compile::(func, self, abi, &self.reg_universe, emit_info) + compile::compile::(func, self, abi, &self.machine_env, emit_info) } } @@ -72,28 +70,27 @@ impl TargetIsa for S390xBackend { want_disasm: bool, ) -> CodegenResult { let flags = self.flags(); - let vcode = self.compile_vcode(func, flags.clone())?; - let (buffer, bb_starts, bb_edges) = vcode.emit(); - let frame_size = vcode.frame_size(); - let value_labels_ranges = vcode.value_labels_ranges(); - let stackslot_offsets = vcode.stackslot_offsets().clone(); + let (vcode, regalloc_result) = self.compile_vcode(func, flags.clone())?; - let disasm = if want_disasm { - Some(vcode.show_rru(Some(&create_reg_universe(flags)))) - } else { - None - }; + let want_disasm = want_disasm || log::log_enabled!(log::Level::Debug); + let emit_result = vcode.emit(®alloc_result, want_disasm, flags.machine_code_cfg_info()); + let frame_size = emit_result.frame_size; + let value_labels_ranges = emit_result.value_labels_ranges; + let buffer = emit_result.buffer.finish(); + let stackslot_offsets = emit_result.stackslot_offsets; - let buffer = buffer.finish(); + if let Some(disasm) = emit_result.disasm.as_ref() { + log::debug!("disassembly:\n{}", disasm); + } Ok(MachCompileResult { buffer, frame_size, - disasm, + disasm: emit_result.disasm, value_labels_ranges, stackslot_offsets, - bb_starts, - bb_edges, + bb_starts: emit_result.bb_offsets, + bb_edges: emit_result.bb_edges, }) } @@ -296,10 +293,11 @@ mod test { // jg label3 // ahi %r2, -4660 // br %r14 + let golden = vec![ - 167, 42, 18, 52, 167, 46, 0, 0, 192, 100, 0, 0, 0, 11, 236, 50, 18, 52, 0, 216, 167, - 62, 0, 0, 192, 100, 255, 255, 255, 251, 167, 46, 0, 0, 192, 100, 255, 255, 255, 246, - 167, 42, 237, 204, 7, 254, + 236, 50, 18, 52, 0, 216, 167, 62, 0, 0, 192, 100, 0, 0, 0, 11, 236, 67, 18, 52, 0, 216, + 167, 78, 0, 0, 192, 100, 255, 255, 255, 251, 167, 62, 0, 0, 192, 100, 255, 255, 255, + 246, 236, 35, 237, 204, 0, 216, 7, 254, ]; assert_eq!(code, &golden[..]); diff --git a/cranelift/codegen/src/isa/unwind.rs b/cranelift/codegen/src/isa/unwind.rs index 4dd8ae78dd..bf4561e840 100644 --- a/cranelift/codegen/src/isa/unwind.rs +++ b/cranelift/codegen/src/isa/unwind.rs @@ -1,6 +1,6 @@ //! Represents information relating to function unwinding. -use regalloc::RealReg; +use crate::machinst::RealReg; #[cfg(feature = "enable-serde")] use serde::{Deserialize, Serialize}; diff --git a/cranelift/codegen/src/isa/unwind/systemv.rs b/cranelift/codegen/src/isa/unwind/systemv.rs index b914f13a75..e2c2a381a3 100644 --- a/cranelift/codegen/src/isa/unwind/systemv.rs +++ b/cranelift/codegen/src/isa/unwind/systemv.rs @@ -1,8 +1,9 @@ //! System V ABI unwind information. -use crate::binemit::CodeOffset; use crate::isa::unwind::UnwindInst; -use crate::result::{CodegenError, CodegenResult}; +use crate::machinst::Reg; +use crate::result::CodegenResult; +use crate::{binemit::CodeOffset, CodegenError}; use alloc::vec::Vec; use gimli::write::{Address, FrameDescriptionEntry}; @@ -160,7 +161,7 @@ pub struct UnwindInfo { len: u32, } -pub(crate) fn create_unwind_info_from_insts>( +pub(crate) fn create_unwind_info_from_insts>( insts: &[(CodeOffset, UnwindInst)], code_len: usize, mr: &MR, @@ -237,7 +238,7 @@ pub(crate) fn create_unwind_info_from_insts>( reg, } => { let reg = mr - .map(reg.to_reg()) + .map(reg.into()) .map_err(|e| CodegenError::RegisterMappingError(e))?; let off = (clobber_offset as i32) - (clobber_offset_to_cfa as i32); instructions.push((instruction_offset, CallFrameInstruction::Offset(reg, off))); diff --git a/cranelift/codegen/src/isa/unwind/winx64.rs b/cranelift/codegen/src/isa/unwind/winx64.rs index 68cb38b758..349b94cfe6 100644 --- a/cranelift/codegen/src/isa/unwind/winx64.rs +++ b/cranelift/codegen/src/isa/unwind/winx64.rs @@ -262,7 +262,7 @@ impl UnwindInfo { const UNWIND_RBP_REG: u8 = 5; -pub(crate) fn create_unwind_info_from_insts>( +pub(crate) fn create_unwind_info_from_insts>( insts: &[(CodeOffset, UnwindInst)], ) -> CodegenResult { let mut unwind_codes = vec![]; @@ -293,7 +293,7 @@ pub(crate) fn create_unwind_info_from_insts>( &UnwindInst::SaveReg { clobber_offset, reg, - } => match MR::map(reg.to_reg()) { + } => match MR::map(reg.into()) { MappedRegister::Int(reg) => { unwind_codes.push(UnwindCode::SaveReg { instruction_offset, diff --git a/cranelift/codegen/src/isa/x64/abi.rs b/cranelift/codegen/src/isa/x64/abi.rs index 9687ac5f26..620dfec168 100644 --- a/cranelift/codegen/src/isa/x64/abi.rs +++ b/cranelift/codegen/src/isa/x64/abi.rs @@ -11,7 +11,7 @@ use crate::{CodegenError, CodegenResult}; use alloc::boxed::Box; use alloc::vec::Vec; use args::*; -use regalloc::{RealReg, Reg, RegClass, Set, Writable}; +use regalloc2::VReg; use smallvec::{smallvec, SmallVec}; use std::convert::TryFrom; @@ -32,7 +32,7 @@ fn try_fill_baldrdash_reg(call_conv: CallConv, param: &ir::AbiParam) -> Option { // This is SpiderMonkey's `WasmTlsReg`. Some(ABIArg::reg( - regs::r14().to_real_reg(), + regs::r14().to_real_reg().unwrap(), types::I64, param.extension, param.purpose, @@ -41,7 +41,7 @@ fn try_fill_baldrdash_reg(call_conv: CallConv, param: &ir::AbiParam) -> Option { // This is SpiderMonkey's `WasmTableCallSigReg`. Some(ABIArg::reg( - regs::r10().to_real_reg(), + regs::r10().to_real_reg().unwrap(), types::I64, param.extension, param.purpose, @@ -204,7 +204,7 @@ impl ABIMachineSpec for X64ABIMachineSpec { let mut slots = vec![]; for (rc, reg_ty) in rcs.iter().zip(reg_tys.iter()) { - let intreg = *rc == RegClass::I64; + let intreg = *rc == RegClass::Int; let nextreg = if intreg { match args_or_rets { ArgsOrRets::Args => { @@ -232,7 +232,7 @@ impl ABIMachineSpec for X64ABIMachineSpec { next_vreg += 1; } slots.push(ABIArgSlot::Reg { - reg: reg.to_real_reg(), + reg: reg.to_real_reg().unwrap(), ty: *reg_ty, extension: param.extension, }); @@ -277,7 +277,7 @@ impl ABIMachineSpec for X64ABIMachineSpec { debug_assert!(args_or_rets == ArgsOrRets::Args); if let Some(reg) = get_intreg_for_arg(&call_conv, next_gpr, next_param_idx) { ret.push(ABIArg::reg( - reg.to_real_reg(), + reg.to_real_reg().unwrap(), types::I64, ir::ArgumentExtension::None, ir::ArgumentPurpose::Normal, @@ -357,8 +357,8 @@ impl ABIMachineSpec for X64ABIMachineSpec { } } - fn gen_ret() -> Self::I { - Inst::ret() + fn gen_ret(rets: Vec) -> Self::I { + Inst::ret(rets) } fn gen_epilogue_placeholder() -> Self::I { @@ -397,8 +397,8 @@ impl ABIMachineSpec for X64ABIMachineSpec { fn get_stacklimit_reg() -> Reg { debug_assert!( - !is_callee_save_systemv(regs::r10().to_real_reg()) - && !is_callee_save_baldrdash(regs::r10().to_real_reg()) + !is_callee_save_systemv(regs::r10().to_real_reg().unwrap()) + && !is_callee_save_baldrdash(regs::r10().to_real_reg().unwrap()) ); // As per comment on trait definition, we must return a caller-save @@ -499,7 +499,7 @@ impl ABIMachineSpec for X64ABIMachineSpec { _call_conv: isa::CallConv, setup_frame: bool, flags: &settings::Flags, - clobbered_callee_saves: &Vec>, + clobbered_callee_saves: &[Writable], fixed_frame_storage_size: u32, _outgoing_args_size: u32, ) -> (u64, SmallVec<[Self::I; 16]>) { @@ -536,25 +536,24 @@ impl ABIMachineSpec for X64ABIMachineSpec { for reg in clobbered_callee_saves { let r_reg = reg.to_reg(); let off = cur_offset; - match r_reg.get_class() { - RegClass::I64 => { + match r_reg.class() { + RegClass::Int => { insts.push(Inst::store( types::I64, - r_reg.to_reg(), + r_reg.into(), Amode::imm_reg(cur_offset, regs::rsp()), )); cur_offset += 8; } - RegClass::V128 => { + RegClass::Float => { cur_offset = align_to(cur_offset, 16); insts.push(Inst::store( types::I8X16, - r_reg.to_reg(), + r_reg.into(), Amode::imm_reg(cur_offset, regs::rsp()), )); cur_offset += 16; } - _ => unreachable!(), }; if flags.unwind_info() { insts.push(Inst::Unwind { @@ -572,7 +571,7 @@ impl ABIMachineSpec for X64ABIMachineSpec { fn gen_clobber_restore( call_conv: isa::CallConv, flags: &settings::Flags, - clobbers: &Set>, + clobbers: &[Writable], fixed_frame_storage_size: u32, _outgoing_args_size: u32, ) -> SmallVec<[Self::I; 16]> { @@ -587,25 +586,24 @@ impl ABIMachineSpec for X64ABIMachineSpec { let mut cur_offset = fixed_frame_storage_size; for reg in &clobbered_callee_saves { let rreg = reg.to_reg(); - match rreg.get_class() { - RegClass::I64 => { + match rreg.class() { + RegClass::Int => { insts.push(Inst::mov64_m_r( Amode::imm_reg(cur_offset, regs::rsp()), - Writable::from_reg(rreg.to_reg()), + Writable::from_reg(rreg.into()), )); cur_offset += 8; } - RegClass::V128 => { + RegClass::Float => { cur_offset = align_to(cur_offset, 16); insts.push(Inst::load( types::I8X16, Amode::imm_reg(cur_offset, regs::rsp()), - Writable::from_reg(rreg.to_reg()), + Writable::from_reg(rreg.into()), ExtKind::None, )); cur_offset += 16; } - _ => unreachable!(), } } // Adjust RSP back upward. @@ -641,34 +639,27 @@ impl ABIMachineSpec for X64ABIMachineSpec { tmp: Writable, _callee_conv: isa::CallConv, _caller_conv: isa::CallConv, - ) -> SmallVec<[(InstIsSafepoint, Self::I); 2]> { + ) -> SmallVec<[Self::I; 2]> { let mut insts = SmallVec::new(); match dest { &CallDest::ExtName(ref name, RelocDistance::Near) => { - insts.push(( - InstIsSafepoint::Yes, - Inst::call_known(name.clone(), uses, defs, opcode), - )); + insts.push(Inst::call_known(name.clone(), uses, defs, opcode)); } &CallDest::ExtName(ref name, RelocDistance::Far) => { - insts.push(( - InstIsSafepoint::No, - Inst::LoadExtName { - dst: tmp, - name: Box::new(name.clone()), - offset: 0, - }, - )); - insts.push(( - InstIsSafepoint::Yes, - Inst::call_unknown(RegMem::reg(tmp.to_reg()), uses, defs, opcode), + insts.push(Inst::LoadExtName { + dst: tmp, + name: Box::new(name.clone()), + offset: 0, + }); + insts.push(Inst::call_unknown( + RegMem::reg(tmp.to_reg()), + uses, + defs, + opcode, )); } &CallDest::Reg(reg) => { - insts.push(( - InstIsSafepoint::Yes, - Inst::call_unknown(RegMem::reg(reg), uses, defs, opcode), - )); + insts.push(Inst::call_unknown(RegMem::reg(reg), uses, defs, opcode)); } } insts @@ -722,9 +713,8 @@ impl ABIMachineSpec for X64ABIMachineSpec { fn get_number_of_spillslots_for_value(rc: RegClass) -> u32 { // We allocate in terms of 8-byte slots. match rc { - RegClass::I64 => 1, - RegClass::V128 => 2, - _ => panic!("Unexpected register class!"), + RegClass::Int => 1, + RegClass::Float => 2, } } @@ -798,7 +788,7 @@ impl ABIMachineSpec for X64ABIMachineSpec { fn get_clobbered_callee_saves( call_conv: CallConv, - regs: &Set>, + regs: &[Writable], ) -> Vec> { let mut regs: Vec> = match call_conv { CallConv::BaldrdashSystemV | CallConv::Baldrdash2020 => regs @@ -824,7 +814,7 @@ impl ABIMachineSpec for X64ABIMachineSpec { }; // Sort registers for deterministic code output. We can do an unstable sort because the // registers will be unique (there are no dups). - regs.sort_unstable_by_key(|r| r.to_reg().get_index()); + regs.sort_unstable_by_key(|r| VReg::from(r.to_reg()).vreg()); regs } @@ -981,21 +971,20 @@ fn get_fltreg_for_retval( fn is_callee_save_systemv(r: RealReg) -> bool { use regs::*; - match r.get_class() { - RegClass::I64 => match r.get_hw_encoding() as u8 { + match r.class() { + RegClass::Int => match r.hw_enc() { ENC_RBX | ENC_RBP | ENC_R12 | ENC_R13 | ENC_R14 | ENC_R15 => true, _ => false, }, - RegClass::V128 => false, - _ => unimplemented!(), + RegClass::Float => false, } } fn is_callee_save_baldrdash(r: RealReg) -> bool { use regs::*; - match r.get_class() { - RegClass::I64 => { - if r.get_hw_encoding() as u8 == ENC_R14 { + match r.class() { + RegClass::Int => { + if r.hw_enc() == ENC_R14 { // r14 is the WasmTlsReg and is preserved implicitly. false } else { @@ -1003,38 +992,35 @@ fn is_callee_save_baldrdash(r: RealReg) -> bool { is_callee_save_systemv(r) } } - RegClass::V128 => false, - _ => unimplemented!(), + RegClass::Float => false, } } fn is_callee_save_fastcall(r: RealReg) -> bool { use regs::*; - match r.get_class() { - RegClass::I64 => match r.get_hw_encoding() as u8 { + match r.class() { + RegClass::Int => match r.hw_enc() { ENC_RBX | ENC_RBP | ENC_RSI | ENC_RDI | ENC_R12 | ENC_R13 | ENC_R14 | ENC_R15 => true, _ => false, }, - RegClass::V128 => match r.get_hw_encoding() as u8 { + RegClass::Float => match r.hw_enc() { 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 => true, _ => false, }, - _ => panic!("Unknown register class: {:?}", r.get_class()), } } -fn compute_clobber_size(clobbers: &Vec>) -> u32 { +fn compute_clobber_size(clobbers: &[Writable]) -> u32 { let mut clobbered_size = 0; for reg in clobbers { - match reg.to_reg().get_class() { - RegClass::I64 => { + match reg.to_reg().class() { + RegClass::Int => { clobbered_size += 8; } - RegClass::V128 => { + RegClass::Float => { clobbered_size = align_to(clobbered_size, 16); clobbered_size += 16; } - _ => unreachable!(), } } align_to(clobbered_size, 16) diff --git a/cranelift/codegen/src/isa/x64/encoding/evex.rs b/cranelift/codegen/src/isa/x64/encoding/evex.rs index 80a3c86bda..117a196f13 100644 --- a/cranelift/codegen/src/isa/x64/encoding/evex.rs +++ b/cranelift/codegen/src/isa/x64/encoding/evex.rs @@ -369,8 +369,8 @@ mod tests { .map(OpcodeMap::_0F38) .w(true) .opcode(0x1F) - .reg(dst.get_hw_encoding()) - .rm(src.get_hw_encoding()) + .reg(dst.to_real_reg().unwrap().hw_enc()) + .rm(src.to_real_reg().unwrap().hw_enc()) .length(EvexVectorLength::V128) .encode(&mut sink0); @@ -393,8 +393,8 @@ mod tests { .map(OpcodeMap::None) .w(false) .opcode(0x00) - .reg(regs::rax().get_hw_encoding()) - .rm(regs::rax().get_hw_encoding()) + .reg(regs::rax().to_real_reg().unwrap().hw_enc()) + .rm(regs::rax().to_real_reg().unwrap().hw_enc()) .mask(EvexMasking::None) .encode(&mut sink1); diff --git a/cranelift/codegen/src/isa/x64/encoding/rex.rs b/cranelift/codegen/src/isa/x64/encoding/rex.rs index 509309205d..fba1007c95 100644 --- a/cranelift/codegen/src/isa/x64/encoding/rex.rs +++ b/cranelift/codegen/src/isa/x64/encoding/rex.rs @@ -8,6 +8,7 @@ //! operand ("G" in Intelese), the order is always G first, then E. The term "enc" in the following //! means "hardware register encoding number". +use crate::machinst::{Reg, RegClass}; use crate::{ ir::TrapCode, isa::x64::inst::{ @@ -16,7 +17,6 @@ use crate::{ }, machinst::MachBuffer, }; -use regalloc::{Reg, RegClass}; pub(crate) fn low8_will_sign_extend_to_64(x: u32) -> bool { let xs = (x as i32) as i64; @@ -50,8 +50,8 @@ pub(crate) fn encode_sib(shift: u8, enc_index: u8, enc_base: u8) -> u8 { pub(crate) fn int_reg_enc(reg: impl Into) -> u8 { let reg = reg.into(); debug_assert!(reg.is_real()); - debug_assert_eq!(reg.get_class(), RegClass::I64); - reg.get_hw_encoding() + debug_assert_eq!(reg.class(), RegClass::Int); + reg.to_real_reg().unwrap().hw_enc() } /// Get the encoding number of any register. @@ -59,7 +59,7 @@ pub(crate) fn int_reg_enc(reg: impl Into) -> u8 { pub(crate) fn reg_enc(reg: impl Into) -> u8 { let reg = reg.into(); debug_assert!(reg.is_real()); - reg.get_hw_encoding() + reg.to_real_reg().unwrap().hw_enc() } /// A small bit field to record a REX prefix specification: diff --git a/cranelift/codegen/src/isa/x64/inst.isle b/cranelift/codegen/src/isa/x64/inst.isle index b4fdbde172..340443c7d8 100644 --- a/cranelift/codegen/src/isa/x64/inst.isle +++ b/cranelift/codegen/src/isa/x64/inst.isle @@ -41,7 +41,8 @@ (Div (size OperandSize) ;; 1, 2, 4, or 8 (signed bool) (divisor GprMem) - (dividend Gpr) + (dividend_lo Gpr) + (dividend_hi Gpr) (dst_quotient WritableGpr) (dst_remainder WritableGpr)) @@ -69,7 +70,8 @@ ;; regalloc failures where %rdx is live before its first def! (CheckedDivOrRemSeq (kind DivOrRemKind) (size OperandSize) - (dividend Gpr) + (dividend_lo Gpr) + (dividend_hi Gpr) ;; The divisor operand. Note it's marked as modified ;; so that it gets assigned a register different from ;; the temporary. @@ -318,7 +320,7 @@ (opcode Opcode)) ;; Return. - (Ret) + (Ret (rets VecReg)) ;; A placeholder instruction, generating no code, meaning that a function ;; epilogue must be inserted there. @@ -476,13 +478,12 @@ ;; `rax`. (MachOTlsGetAddr (symbol ExternalName)) - ;; A definition of a value label. - (ValueLabelMarker (reg Reg) - (label ValueLabel)) - ;; An unwind pseudoinstruction describing the state of the machine at ;; this program point. - (Unwind (inst UnwindInst)))) + (Unwind (inst UnwindInst)) + + ;; A pseudoinstruction that just keeps a value alive. + (DummyUse (reg Reg)))) (type OperandSize extern (enum Size8 diff --git a/cranelift/codegen/src/isa/x64/inst/args.rs b/cranelift/codegen/src/isa/x64/inst/args.rs index 6ad2e0111e..cae4957b4c 100644 --- a/cranelift/codegen/src/isa/x64/inst/args.rs +++ b/cranelift/codegen/src/isa/x64/inst/args.rs @@ -1,14 +1,13 @@ //! Instruction operand sub-components (aka "parts"): definitions and printing. -use super::regs::{self, show_ireg_sized}; +use super::regs::{self}; use super::EmitState; use crate::ir::condcodes::{FloatCC, IntCC}; use crate::ir::{MemFlags, Type}; +use crate::isa::x64::inst::regs::pretty_print_reg; use crate::isa::x64::inst::Inst; use crate::machinst::*; -use regalloc::{ - PrettyPrint, PrettyPrintSized, RealRegUniverse, Reg, RegClass, RegUsageCollector, Writable, -}; +use regalloc2::VReg; use smallvec::{smallvec, SmallVec}; use std::fmt; use std::string::String; @@ -23,25 +22,6 @@ pub trait FromWritableReg: Sized { fn from_writable_reg(w: Writable) -> Option; } -/// An extension trait for mapping register uses on `{Xmm,Gpr}`. -pub trait MapUseExt { - fn map_use(&mut self, mapper: &RM) - where - RM: RegMapper; -} - -/// An extension trait for mapping register mods and defs on -/// `Writable{Xmm,Gpr}`. -pub trait MapDefModExt { - fn map_def(&mut self, mapper: &RM) - where - RM: RegMapper; - - fn map_mod(&mut self, mapper: &RM) - where - RM: RegMapper; -} - /// A macro for defining a newtype of `Reg` that enforces some invariant about /// the wrapped `Reg` (such as that it is of a particular register class). macro_rules! newtype_of_reg { @@ -55,7 +35,7 @@ macro_rules! newtype_of_reg { |$check_reg:ident| $check:expr ) => { /// A newtype wrapper around `Reg`. - #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)] + #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] pub struct $newtype_reg(Reg); impl PartialEq for $newtype_reg { @@ -70,12 +50,6 @@ macro_rules! newtype_of_reg { } } - impl PrettyPrint for $newtype_reg { - fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String { - self.0.show_rru(mb_rru) - } - } - impl $newtype_reg { /// Create this newtype from the given register, or return `None` if the register /// is not a valid instance of this newtype. @@ -107,21 +81,6 @@ macro_rules! newtype_of_reg { } } - impl MapUseExt for $newtype_reg { - fn map_use(&mut self, mapper: &RM) - where - RM: RegMapper, - { - let mut reg = self.0; - mapper.map_use(&mut reg); - debug_assert!({ - let $check_reg = reg; - $check - }); - *self = $newtype_reg(reg); - } - } - pub type $newtype_writable_reg = Writable<$newtype_reg>; #[allow(dead_code)] // Used by some newtypes and not others. @@ -139,34 +98,6 @@ macro_rules! newtype_of_reg { } } - impl MapDefModExt for $newtype_writable_reg { - fn map_def(&mut self, mapper: &RM) - where - RM: RegMapper, - { - let mut reg = self.to_writable_reg(); - mapper.map_def(&mut reg); - debug_assert!({ - let $check_reg = reg.to_reg(); - $check - }); - *self = Writable::from_reg($newtype_reg(reg.to_reg())); - } - - fn map_mod(&mut self, mapper: &RM) - where - RM: RegMapper, - { - let mut reg = self.to_writable_reg(); - mapper.map_mod(&mut reg); - debug_assert!({ - let $check_reg = reg.to_reg(); - $check - }); - *self = Writable::from_reg($newtype_reg(reg.to_reg())); - } - } - /// A newtype wrapper around `RegMem` for general-purpose registers. #[derive(Clone, Debug)] pub struct $newtype_reg_mem(RegMem); @@ -201,44 +132,16 @@ macro_rules! newtype_of_reg { } #[allow(dead_code)] // Used by some newtypes and not others. - pub fn map_uses(&mut self, mapper: &RM) - where - RM: RegMapper, - { - self.0.map_uses(mapper); - debug_assert!(match self.0 { - RegMem::Reg { reg: $check_reg } => $check, - _ => true, - }); - } - - #[allow(dead_code)] // Used by some newtypes and not others. - pub fn map_as_def(&mut self, mapper: &RM) - where - RM: RegMapper, - { - self.0.map_as_def(mapper); - debug_assert!(match self.0 { - RegMem::Reg { reg: $check_reg } => $check, - _ => true, - }); - } - - #[allow(dead_code)] // Used by some newtypes and not others. - pub fn get_regs_as_uses(&self, collector: &mut RegUsageCollector) { - self.0.get_regs_as_uses(collector); + pub fn get_operands VReg>( + &self, + collector: &mut OperandCollector<'_, F>, + ) { + self.0.get_operands(collector); } } - impl PrettyPrint for $newtype_reg_mem { - fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String { - self.0.show_rru(mb_rru) - } - } - - impl PrettyPrintSized for $newtype_reg_mem { - fn show_rru_sized(&self, mb_rru: Option<&RealRegUniverse>, size: u8) -> String { - self.0.show_rru_sized(mb_rru, size) + fn pretty_print(&self, size: u8, allocs: &mut AllocationConsumer<'_>) -> String { + self.0.pretty_print(size, allocs) } } @@ -278,44 +181,17 @@ macro_rules! newtype_of_reg { } #[allow(dead_code)] // Used by some newtypes and not others. - pub fn map_uses(&mut self, mapper: &RM) - where - RM: RegMapper, - { - self.0.map_uses(mapper); - debug_assert!(match self.0 { - RegMemImm::Reg { reg: $check_reg } => $check, - _ => true, - }); - } - - #[allow(dead_code)] // Used by some newtypes and not others. - pub fn map_as_def(&mut self, mapper: &RM) - where - RM: RegMapper, - { - self.0.map_as_def(mapper); - debug_assert!(match self.0 { - RegMemImm::Reg { reg: $check_reg } => $check, - _ => true, - }); - } - - #[allow(dead_code)] // Used by some newtypes and not others. - pub fn get_regs_as_uses(&self, collector: &mut RegUsageCollector) { - self.0.get_regs_as_uses(collector); + pub fn get_operands VReg>( + &self, + collector: &mut OperandCollector<'_, F>, + ) { + self.0.get_operands(collector); } } impl PrettyPrint for $newtype_reg_mem_imm { - fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String { - self.0.show_rru(mb_rru) - } - } - - impl PrettyPrintSized for $newtype_reg_mem_imm { - fn show_rru_sized(&self, mb_rru: Option<&RealRegUniverse>, size: u8) -> String { - self.0.show_rru_sized(mb_rru, size) + fn pretty_print(&self, size: u8, allocs: &mut AllocationConsumer<'_>) -> String { + self.0.pretty_print(size, allocs) } } @@ -359,7 +235,7 @@ newtype_of_reg!( GprMem, GprMemImm, Imm8Gpr, - |reg| reg.get_class() == RegClass::I64 + |reg| reg.class() == RegClass::Int ); // Define a newtype of `Reg` for XMM registers. @@ -370,7 +246,7 @@ newtype_of_reg!( XmmMem, XmmMemImm, Imm8Xmm, - |reg| reg.get_class() == RegClass::V128 + |reg| reg.class() == RegClass::Float ); /// A possible addressing mode (amode) that can be used in instructions. @@ -400,7 +276,7 @@ pub enum Amode { impl Amode { pub(crate) fn imm_reg(simm32: u32, base: Reg) -> Self { - debug_assert!(base.get_class() == RegClass::I64); + debug_assert!(base.class() == RegClass::Int); Self::ImmReg { simm32, base, @@ -409,8 +285,8 @@ impl Amode { } pub(crate) fn imm_reg_reg_shift(simm32: u32, base: Gpr, index: Gpr, shift: u8) -> Self { - debug_assert!(base.get_class() == RegClass::I64); - debug_assert!(index.get_class() == RegClass::I64); + debug_assert!(base.class() == RegClass::Int); + debug_assert!(index.class() == RegClass::Int); debug_assert!(shift <= 3); Self::ImmRegRegShift { simm32, @@ -450,14 +326,17 @@ impl Amode { } /// Add the regs mentioned by `self` to `collector`. - pub(crate) fn get_regs_as_uses(&self, collector: &mut RegUsageCollector) { + pub(crate) fn get_operands VReg>( + &self, + collector: &mut OperandCollector<'_, F>, + ) { match self { Amode::ImmReg { base, .. } => { - collector.add_use(*base); + collector.reg_use(*base); } Amode::ImmRegRegShift { base, index, .. } => { - collector.add_use(base.to_reg()); - collector.add_use(index.to_reg()); + collector.reg_use(base.to_reg()); + collector.reg_use(index.to_reg()); } Amode::RipRelative { .. } => { // RIP isn't involved in regalloc. @@ -476,13 +355,56 @@ impl Amode { pub(crate) fn can_trap(&self) -> bool { !self.get_flags().notrap() } + + pub(crate) fn with_allocs(&self, allocs: &mut AllocationConsumer<'_>) -> Self { + // The order in which we consume allocs here must match the + // order in which we produce operands in get_operands() above. + match self { + &Amode::ImmReg { + simm32, + base, + flags, + } => Amode::ImmReg { + simm32, + flags, + base: allocs.next(base), + }, + &Amode::ImmRegRegShift { + simm32, + base, + index, + shift, + flags, + } => Amode::ImmRegRegShift { + simm32, + shift, + flags, + base: Gpr::new(allocs.next(*base)).unwrap(), + index: Gpr::new(allocs.next(*index)).unwrap(), + }, + &Amode::RipRelative { target } => Amode::RipRelative { target }, + } + } + + /// Offset the amode by a fixed offset. + pub(crate) fn offset(&self, offset: u32) -> Self { + let mut ret = self.clone(); + match &mut ret { + &mut Amode::ImmReg { ref mut simm32, .. } => *simm32 += offset, + &mut Amode::ImmRegRegShift { ref mut simm32, .. } => *simm32 += offset, + _ => panic!("Cannot offset amode: {:?}", self), + } + ret + } } impl PrettyPrint for Amode { - fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String { + fn pretty_print(&self, _size: u8, allocs: &mut AllocationConsumer<'_>) -> String { match self { Amode::ImmReg { simm32, base, .. } => { - format!("{}({})", *simm32 as i32, base.show_rru(mb_rru)) + // Note: size is always 8; the address is 64 bits, + // even if the addressed operand is smaller. + format!("{}({})", *simm32 as i32, pretty_print_reg(*base, 8, allocs)) } Amode::ImmRegRegShift { simm32, @@ -493,8 +415,8 @@ impl PrettyPrint for Amode { } => format!( "{}({},{},{})", *simm32 as i32, - base.show_rru(mb_rru), - index.show_rru(mb_rru), + pretty_print_reg(base.to_reg(), 8, allocs), + pretty_print_reg(index.to_reg(), 8, allocs), 1 << shift ), Amode::RipRelative { ref target } => format!("label{}(%rip)", target.get()), @@ -524,9 +446,12 @@ impl SyntheticAmode { } /// Add the regs mentioned by `self` to `collector`. - pub(crate) fn get_regs_as_uses(&self, collector: &mut RegUsageCollector) { + pub(crate) fn get_operands VReg>( + &self, + collector: &mut OperandCollector<'_, F>, + ) { match self { - SyntheticAmode::Real(addr) => addr.get_regs_as_uses(collector), + SyntheticAmode::Real(addr) => addr.get_operands(collector), SyntheticAmode::NominalSPOffset { .. } => { // Nothing to do; the base is SP and isn't involved in regalloc. } @@ -534,16 +459,6 @@ impl SyntheticAmode { } } - pub(crate) fn map_uses(&mut self, map: &RM) { - match self { - SyntheticAmode::Real(addr) => addr.map_uses(map), - SyntheticAmode::NominalSPOffset { .. } => { - // Nothing to do. - } - SyntheticAmode::ConstantOffset(_) => {} - } - } - pub(crate) fn finalize(&self, state: &mut EmitState, buffer: &MachBuffer) -> Amode { match self { SyntheticAmode::Real(addr) => addr.clone(), @@ -561,6 +476,15 @@ impl SyntheticAmode { } } } + + pub(crate) fn with_allocs(&self, allocs: &mut AllocationConsumer<'_>) -> Self { + match self { + SyntheticAmode::Real(addr) => SyntheticAmode::Real(addr.with_allocs(allocs)), + &SyntheticAmode::NominalSPOffset { .. } | &SyntheticAmode::ConstantOffset { .. } => { + self.clone() + } + } + } } impl Into for Amode { @@ -570,9 +494,10 @@ impl Into for Amode { } impl PrettyPrint for SyntheticAmode { - fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String { + fn pretty_print(&self, _size: u8, allocs: &mut AllocationConsumer<'_>) -> String { match self { - SyntheticAmode::Real(addr) => addr.show_rru(mb_rru), + // See note in `Amode` regarding constant size of `8`. + SyntheticAmode::Real(addr) => addr.pretty_print(8, allocs), SyntheticAmode::NominalSPOffset { simm32 } => { format!("rsp({} + virtual offset)", *simm32 as i32) } @@ -594,7 +519,7 @@ pub enum RegMemImm { impl RegMemImm { pub(crate) fn reg(reg: Reg) -> Self { - debug_assert!(reg.get_class() == RegClass::I64 || reg.get_class() == RegClass::V128); + debug_assert!(reg.class() == RegClass::Int || reg.class() == RegClass::Float); Self::Reg { reg } } pub(crate) fn mem(addr: impl Into) -> Self { @@ -607,15 +532,18 @@ impl RegMemImm { /// Asserts that in register mode, the reg class is the one that's expected. pub(crate) fn assert_regclass_is(&self, expected_reg_class: RegClass) { if let Self::Reg { reg } = self { - debug_assert_eq!(reg.get_class(), expected_reg_class); + debug_assert_eq!(reg.class(), expected_reg_class); } } /// Add the regs mentioned by `self` to `collector`. - pub(crate) fn get_regs_as_uses(&self, collector: &mut RegUsageCollector) { + pub(crate) fn get_operands VReg>( + &self, + collector: &mut OperandCollector<'_, F>, + ) { match self { - Self::Reg { reg } => collector.add_use(*reg), - Self::Mem { addr } => addr.get_regs_as_uses(collector), + Self::Reg { reg } => collector.reg_use(*reg), + Self::Mem { addr } => addr.get_operands(collector), Self::Imm { .. } => {} } } @@ -626,19 +554,25 @@ impl RegMemImm { _ => None, } } -} -impl PrettyPrint for RegMemImm { - fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String { - self.show_rru_sized(mb_rru, 8) + pub(crate) fn with_allocs(&self, allocs: &mut AllocationConsumer<'_>) -> Self { + match self { + Self::Reg { reg } => Self::Reg { + reg: allocs.next(*reg), + }, + Self::Mem { addr } => Self::Mem { + addr: addr.with_allocs(allocs), + }, + Self::Imm { .. } => self.clone(), + } } } -impl PrettyPrintSized for RegMemImm { - fn show_rru_sized(&self, mb_rru: Option<&RealRegUniverse>, size: u8) -> String { +impl PrettyPrint for RegMemImm { + fn pretty_print(&self, size: u8, allocs: &mut AllocationConsumer<'_>) -> String { match self { - Self::Reg { reg } => show_ireg_sized(*reg, mb_rru, size), - Self::Mem { addr } => addr.show_rru(mb_rru), + Self::Reg { reg } => pretty_print_reg(*reg, size, allocs), + Self::Mem { addr } => addr.pretty_print(size, allocs), Self::Imm { simm32 } => format!("${}", *simm32 as i32), } } @@ -673,7 +607,7 @@ pub enum RegMem { impl RegMem { pub(crate) fn reg(reg: Reg) -> Self { - debug_assert!(reg.get_class() == RegClass::I64 || reg.get_class() == RegClass::V128); + debug_assert!(reg.class() == RegClass::Int || reg.class() == RegClass::Float); Self::Reg { reg } } pub(crate) fn mem(addr: impl Into) -> Self { @@ -682,14 +616,17 @@ impl RegMem { /// Asserts that in register mode, the reg class is the one that's expected. pub(crate) fn assert_regclass_is(&self, expected_reg_class: RegClass) { if let Self::Reg { reg } = self { - debug_assert_eq!(reg.get_class(), expected_reg_class); + debug_assert_eq!(reg.class(), expected_reg_class); } } /// Add the regs mentioned by `self` to `collector`. - pub(crate) fn get_regs_as_uses(&self, collector: &mut RegUsageCollector) { + pub(crate) fn get_operands VReg>( + &self, + collector: &mut OperandCollector<'_, F>, + ) { match self { - RegMem::Reg { reg } => collector.add_use(*reg), - RegMem::Mem { addr, .. } => addr.get_regs_as_uses(collector), + RegMem::Reg { reg } => collector.reg_use(*reg), + RegMem::Mem { addr, .. } => addr.get_operands(collector), } } pub(crate) fn to_reg(&self) -> Option { @@ -698,6 +635,17 @@ impl RegMem { _ => None, } } + + pub(crate) fn with_allocs(&self, allocs: &mut AllocationConsumer<'_>) -> Self { + match self { + RegMem::Reg { reg } => RegMem::Reg { + reg: allocs.next(*reg), + }, + RegMem::Mem { addr } => RegMem::Mem { + addr: addr.with_allocs(allocs), + }, + } + } } impl From> for RegMem { @@ -707,16 +655,10 @@ impl From> for RegMem { } impl PrettyPrint for RegMem { - fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String { - self.show_rru_sized(mb_rru, 8) - } -} - -impl PrettyPrintSized for RegMem { - fn show_rru_sized(&self, mb_rru: Option<&RealRegUniverse>, size: u8) -> String { + fn pretty_print(&self, size: u8, allocs: &mut AllocationConsumer<'_>) -> String { match self { - RegMem::Reg { reg } => show_ireg_sized(*reg, mb_rru, size), - RegMem::Mem { addr, .. } => addr.show_rru(mb_rru), + RegMem::Reg { reg } => pretty_print_reg(*reg, size, allocs), + RegMem::Mem { addr, .. } => addr.pretty_print(size, allocs), } } } @@ -1222,6 +1164,22 @@ impl SseOpcode { _ => 8, } } + + /// Does an XmmRmmRImm with this opcode use src1? FIXME: split + /// into separate instructions. + pub(crate) fn uses_src1(&self) -> bool { + match self { + SseOpcode::Pextrb => false, + SseOpcode::Pextrw => false, + SseOpcode::Pextrd => false, + SseOpcode::Pshufd => false, + SseOpcode::Roundss => false, + SseOpcode::Roundsd => false, + SseOpcode::Roundps => false, + SseOpcode::Roundpd => false, + _ => true, + } + } } impl fmt::Debug for SseOpcode { diff --git a/cranelift/codegen/src/isa/x64/inst/emit.rs b/cranelift/codegen/src/isa/x64/inst/emit.rs index 2ff056625c..0f98010978 100644 --- a/cranelift/codegen/src/isa/x64/inst/emit.rs +++ b/cranelift/codegen/src/isa/x64/inst/emit.rs @@ -10,15 +10,15 @@ use crate::isa::x64::encoding::rex::{ }; use crate::isa::x64::inst::args::*; use crate::isa::x64::inst::*; -use crate::machinst::{inst_common, MachBuffer, MachInstEmit, MachLabel}; +use crate::machinst::{inst_common, MachBuffer, MachInstEmit, MachLabel, Reg, Writable}; use core::convert::TryInto; -use regalloc::{Reg, Writable}; /// A small helper to generate a signed conversion instruction. fn emit_signed_cvt( sink: &mut MachBuffer, info: &EmitInfo, state: &mut EmitState, + // Required to be RealRegs. src: Reg, dst: Writable, to_f64: bool, @@ -31,7 +31,7 @@ fn emit_signed_cvt( SseOpcode::Cvtsi2ss }; let inst = Inst::gpr_to_xmm(op, RegMem::reg(src), OperandSize::Size64, dst); - inst.emit(sink, info, state); + inst.emit(&[], sink, info, state); } /// Emits a one way conditional jump if CC is set (true). @@ -110,6 +110,7 @@ fn emit_reloc( /// care?) pub(crate) fn emit( inst: &Inst, + allocs: &mut AllocationConsumer<'_>, sink: &mut MachBuffer, info: &EmitInfo, state: &mut EmitState, @@ -151,22 +152,24 @@ pub(crate) fn emit( src2, dst: reg_g, } => { - debug_assert_eq!(*src1, reg_g.to_reg()); + let (reg_g, src2) = if inst.produces_const() { + let reg_g = allocs.next(reg_g.to_reg().to_reg()); + (reg_g, RegMemImm::reg(reg_g)) + } else { + let src1 = allocs.next(src1.to_reg()); + let reg_g = allocs.next(reg_g.to_reg().to_reg()); + debug_assert_eq!(src1, reg_g); + let src2 = src2.clone().to_reg_mem_imm().with_allocs(allocs); + (reg_g, src2) + }; + let mut rex = RexFlags::from(*size); if *op == AluRmiROpcode::Mul { // We kinda freeloaded Mul into RMI_R_Op, but it doesn't fit the usual pattern, so // we have to special-case it. - match src2.clone().to_reg_mem_imm() { + match src2 { RegMemImm::Reg { reg: reg_e } => { - emit_std_reg_reg( - sink, - LegacyPrefixes::None, - 0x0FAF, - 2, - reg_g.to_reg().to_reg(), - reg_e, - rex, - ); + emit_std_reg_reg(sink, LegacyPrefixes::None, 0x0FAF, 2, reg_g, reg_e, rex); } RegMemImm::Mem { addr } => { @@ -178,7 +181,7 @@ pub(crate) fn emit( LegacyPrefixes::None, 0x0FAF, 2, - reg_g.to_reg().to_reg(), + reg_g, &amode, rex, ); @@ -188,15 +191,7 @@ pub(crate) fn emit( let use_imm8 = low8_will_sign_extend_to_32(simm32); let opcode = if use_imm8 { 0x6B } else { 0x69 }; // Yes, really, reg_g twice. - emit_std_reg_reg( - sink, - LegacyPrefixes::None, - opcode, - 1, - reg_g.to_reg().to_reg(), - reg_g.to_reg().to_reg(), - rex, - ); + emit_std_reg_reg(sink, LegacyPrefixes::None, opcode, 1, reg_g, reg_g, rex); emit_simm(sink, if use_imm8 { 1 } else { 4 }, simm32); } } @@ -215,11 +210,11 @@ pub(crate) fn emit( }; assert!(!(is_8bit && *size == OperandSize::Size64)); - match src2.clone().to_reg_mem_imm() { + match src2 { RegMemImm::Reg { reg: reg_e } => { if is_8bit { rex.always_emit_if_8bit_needed(reg_e); - rex.always_emit_if_8bit_needed(reg_g.to_reg().to_reg()); + rex.always_emit_if_8bit_needed(reg_g); } // GCC/llvm use the swapped operand encoding (viz., the R/RM vs RM/R // duality). Do this too, so as to be able to compare generated machine @@ -230,17 +225,17 @@ pub(crate) fn emit( opcode_r, 1, reg_e, - reg_g.to_reg().to_reg(), + reg_g, rex, ); } RegMemImm::Mem { addr } => { + let amode = addr.finalize(state, sink); if is_8bit { - rex.always_emit_if_8bit_needed(reg_g.to_reg().to_reg()); + rex.always_emit_if_8bit_needed(reg_g); } // Here we revert to the "normal" G-E ordering. - let amode = addr.finalize(state, sink); emit_std_reg_mem( sink, state, @@ -248,7 +243,7 @@ pub(crate) fn emit( LegacyPrefixes::None, opcode_m, 1, - reg_g.to_reg().to_reg(), + reg_g, &amode, rex, ); @@ -259,7 +254,7 @@ pub(crate) fn emit( let use_imm8 = low8_will_sign_extend_to_32(simm32); let opcode = if use_imm8 { 0x83 } else { 0x81 }; // And also here we use the "normal" G-E ordering. - let enc_g = int_reg_enc(reg_g.to_reg().to_reg()); + let enc_g = int_reg_enc(reg_g); emit_std_enc_enc( sink, LegacyPrefixes::None, @@ -276,6 +271,7 @@ pub(crate) fn emit( } Inst::UnaryRmR { size, op, src, dst } => { + let dst = allocs.next(dst.to_reg().to_reg()); let rex_flags = RexFlags::from(*size); use UnaryRmROpcode::*; let prefix = match size { @@ -299,17 +295,12 @@ pub(crate) fn emit( }; match src.clone().into() { - RegMem::Reg { reg: src } => emit_std_reg_reg( - sink, - prefix, - opcode, - num_opcodes, - dst.to_reg().to_reg(), - src, - rex_flags, - ), + RegMem::Reg { reg: src } => { + let src = allocs.next(src); + emit_std_reg_reg(sink, prefix, opcode, num_opcodes, dst, src, rex_flags); + } RegMem::Mem { addr: src } => { - let amode = src.finalize(state, sink); + let amode = src.finalize(state, sink).with_allocs(allocs); emit_std_reg_mem( sink, state, @@ -317,7 +308,7 @@ pub(crate) fn emit( prefix, opcode, num_opcodes, - dst.to_reg().to_reg(), + dst, &amode, rex_flags, ); @@ -326,8 +317,10 @@ pub(crate) fn emit( } Inst::Not { size, src, dst } => { - debug_assert_eq!(*src, dst.to_reg()); - let rex_flags = RexFlags::from((*size, dst.to_writable_reg().to_reg())); + let src = allocs.next(src.to_reg()); + let dst = allocs.next(dst.to_reg().to_reg()); + debug_assert_eq!(src, dst); + let rex_flags = RexFlags::from((*size, dst)); let (opcode, prefix) = match size { OperandSize::Size8 => (0xF6, LegacyPrefixes::None), OperandSize::Size16 => (0xF7, LegacyPrefixes::_66), @@ -336,13 +329,15 @@ pub(crate) fn emit( }; let subopcode = 2; - let enc_src = int_reg_enc(dst.to_reg()); + let enc_src = int_reg_enc(dst); emit_std_enc_enc(sink, prefix, opcode, 1, subopcode, enc_src, rex_flags) } Inst::Neg { size, src, dst } => { - debug_assert_eq!(*src, dst.to_reg()); - let rex_flags = RexFlags::from((*size, dst.to_writable_reg().to_reg())); + let src = allocs.next(src.to_reg()); + let dst = allocs.next(dst.to_reg().to_reg()); + debug_assert_eq!(src, dst); + let rex_flags = RexFlags::from((*size, dst)); let (opcode, prefix) = match size { OperandSize::Size8 => (0xF6, LegacyPrefixes::None), OperandSize::Size16 => (0xF7, LegacyPrefixes::_66), @@ -351,21 +346,28 @@ pub(crate) fn emit( }; let subopcode = 3; - let enc_src = int_reg_enc(dst.to_reg()); + let enc_src = int_reg_enc(dst); emit_std_enc_enc(sink, prefix, opcode, 1, subopcode, enc_src, rex_flags) } Inst::Div { size, signed, - dividend, + dividend_lo, + dividend_hi, divisor, dst_quotient, dst_remainder, } => { - debug_assert_eq!(*dividend, regs::rax()); - debug_assert_eq!(dst_quotient.to_reg(), regs::rax()); - debug_assert_eq!(dst_remainder.to_reg(), regs::rdx()); + let dividend_lo = allocs.next(dividend_lo.to_reg()); + let dividend_hi = allocs.next(dividend_hi.to_reg()); + let dst_quotient = allocs.next(dst_quotient.to_reg().to_reg()); + let dst_remainder = allocs.next(dst_remainder.to_reg().to_reg()); + debug_assert_eq!(dividend_lo, regs::rax()); + debug_assert_eq!(dividend_hi, regs::rdx()); + debug_assert_eq!(dst_quotient, regs::rax()); + debug_assert_eq!(dst_remainder, regs::rdx()); + let (opcode, prefix) = match size { OperandSize::Size8 => (0xF6, LegacyPrefixes::None), OperandSize::Size16 => (0xF7, LegacyPrefixes::_66), @@ -379,6 +381,7 @@ pub(crate) fn emit( let subopcode = if *signed { 7 } else { 6 }; match divisor.clone().to_reg_mem() { RegMem::Reg { reg } => { + let reg = allocs.next(reg); let src = int_reg_enc(reg); emit_std_enc_enc( sink, @@ -391,7 +394,7 @@ pub(crate) fn emit( ) } RegMem::Mem { addr: src } => { - let amode = src.finalize(state, sink); + let amode = src.finalize(state, sink).with_allocs(allocs); emit_std_enc_mem( sink, state, @@ -415,9 +418,12 @@ pub(crate) fn emit( dst_lo, dst_hi, } => { - debug_assert_eq!(*src1, regs::rax()); - debug_assert_eq!(dst_lo.to_reg(), regs::rax()); - debug_assert_eq!(dst_hi.to_reg(), regs::rdx()); + let src1 = allocs.next(src1.to_reg()); + let dst_lo = allocs.next(dst_lo.to_reg().to_reg()); + let dst_hi = allocs.next(dst_hi.to_reg().to_reg()); + debug_assert_eq!(src1, regs::rax()); + debug_assert_eq!(dst_lo, regs::rax()); + debug_assert_eq!(dst_hi, regs::rdx()); let rex_flags = RexFlags::from(*size); let prefix = match size { @@ -430,11 +436,12 @@ pub(crate) fn emit( let subopcode = if *signed { 5 } else { 4 }; match src2.clone().to_reg_mem() { RegMem::Reg { reg } => { + let reg = allocs.next(reg); let src = int_reg_enc(reg); emit_std_enc_enc(sink, prefix, 0xF7, 1, subopcode, src, rex_flags) } RegMem::Mem { addr: src } => { - let amode = src.finalize(state, sink); + let amode = src.finalize(state, sink).with_allocs(allocs); emit_std_enc_mem( sink, state, info, prefix, 0xF7, 1, subopcode, &amode, rex_flags, ); @@ -443,8 +450,10 @@ pub(crate) fn emit( } Inst::SignExtendData { size, src, dst } => { - debug_assert_eq!(*src, regs::rax()); - debug_assert_eq!(dst.to_reg(), regs::rdx()); + let src = allocs.next(src.to_reg()); + let dst = allocs.next(dst.to_reg().to_reg()); + debug_assert_eq!(src, regs::rax()); + debug_assert_eq!(dst, regs::rdx()); match size { OperandSize::Size8 => { sink.put1(0x66); @@ -465,15 +474,23 @@ pub(crate) fn emit( Inst::CheckedDivOrRemSeq { kind, size, - dividend, + dividend_lo, + dividend_hi, divisor, tmp, dst_quotient, dst_remainder, } => { - debug_assert_eq!(*dividend, regs::rax()); - debug_assert_eq!(dst_quotient.to_reg(), regs::rax()); - debug_assert_eq!(dst_remainder.to_reg(), regs::rdx()); + let dividend_lo = allocs.next(dividend_lo.to_reg()); + let dividend_hi = allocs.next(dividend_hi.to_reg()); + let divisor = allocs.next(divisor.to_reg().to_reg()); + let dst_quotient = allocs.next(dst_quotient.to_reg().to_reg()); + let dst_remainder = allocs.next(dst_remainder.to_reg().to_reg()); + let tmp = tmp.map(|tmp| allocs.next(tmp.to_reg().to_reg())); + debug_assert_eq!(dividend_lo, regs::rax()); + debug_assert_eq!(dividend_hi, regs::rdx()); + debug_assert_eq!(dst_quotient, regs::rax()); + debug_assert_eq!(dst_remainder, regs::rdx()); // Generates the following code sequence: // @@ -504,18 +521,16 @@ pub(crate) fn emit( // $done: // Check if the divisor is zero, first. - let inst = Inst::cmp_rmi_r(*size, RegMemImm::imm(0), divisor.to_reg().to_reg()); - inst.emit(sink, info, state); + let inst = Inst::cmp_rmi_r(*size, RegMemImm::imm(0), divisor); + inst.emit(&[], sink, info, state); let inst = Inst::trap_if(CC::Z, TrapCode::IntegerDivisionByZero); - inst.emit(sink, info, state); + inst.emit(&[], sink, info, state); let (do_op, done_label) = if kind.is_signed() { // Now check if the divisor is -1. - let inst = - Inst::cmp_rmi_r(*size, RegMemImm::imm(0xffffffff), divisor.to_reg().to_reg()); - inst.emit(sink, info, state); - + let inst = Inst::cmp_rmi_r(*size, RegMemImm::imm(0xffffffff), divisor); + inst.emit(&[], sink, info, state); let do_op = sink.get_label(); // If not equal, jump to do-op. @@ -527,10 +542,10 @@ pub(crate) fn emit( let done_label = sink.get_label(); let inst = Inst::imm(OperandSize::Size64, 0, Writable::from_reg(regs::rdx())); - inst.emit(sink, info, state); + inst.emit(&[], sink, info, state); let inst = Inst::jmp_known(done_label); - inst.emit(sink, info, state); + inst.emit(&[], sink, info, state); (Some(do_op), Some(done_label)) } else { @@ -541,24 +556,21 @@ pub(crate) fn emit( let inst = Inst::imm( OperandSize::Size64, 0x8000000000000000, - tmp.to_writable_reg(), + Writable::from_reg(tmp), ); - inst.emit(sink, info, state); + inst.emit(&[], sink, info, state); - let inst = Inst::cmp_rmi_r( - OperandSize::Size64, - RegMemImm::reg(tmp.to_reg().to_reg()), - regs::rax(), - ); - inst.emit(sink, info, state); + let inst = + Inst::cmp_rmi_r(OperandSize::Size64, RegMemImm::reg(tmp), regs::rax()); + inst.emit(&[], sink, info, state); } else { let inst = Inst::cmp_rmi_r(*size, RegMemImm::imm(0x80000000), regs::rax()); - inst.emit(sink, info, state); + inst.emit(&[], sink, info, state); } // If not equal, jump over the trap. let inst = Inst::trap_if(CC::Z, TrapCode::IntegerOverflow); - inst.emit(sink, info, state); + inst.emit(&[], sink, info, state); (Some(do_op), None) } @@ -574,19 +586,15 @@ pub(crate) fn emit( if kind.is_signed() { // sign-extend the sign-bit of rax into rdx, for signed opcodes. let inst = Inst::sign_extend_data(*size); - inst.emit(sink, info, state); + inst.emit(&[], sink, info, state); } else { // zero for unsigned opcodes. let inst = Inst::imm(OperandSize::Size64, 0, Writable::from_reg(regs::rdx())); - inst.emit(sink, info, state); + inst.emit(&[], sink, info, state); } - let inst = Inst::div( - *size, - kind.is_signed(), - RegMem::reg(divisor.to_reg().to_reg()), - ); - inst.emit(sink, info, state); + let inst = Inst::div(*size, kind.is_signed(), RegMem::reg(divisor)); + inst.emit(&[], sink, info, state); // Lowering takes care of moving the result back into the right register, see comment // there. @@ -601,7 +609,8 @@ pub(crate) fn emit( simm64, dst, } => { - let enc_dst = int_reg_enc(dst.to_reg()); + let dst = allocs.next(dst.to_reg().to_reg()); + let enc_dst = int_reg_enc(dst); if *dst_size == OperandSize::Size64 { if low32_will_sign_extend_to_64(*simm64) { // Sign-extended move imm32. @@ -630,18 +639,21 @@ pub(crate) fn emit( } Inst::MovRR { size, src, dst } => { + let src = allocs.next(src.to_reg()); + let dst = allocs.next(dst.to_reg().to_reg()); emit_std_reg_reg( sink, LegacyPrefixes::None, 0x89, 1, - src.to_reg(), - dst.to_reg().to_reg(), + src, + dst, RexFlags::from(*size), ); } Inst::MovzxRmR { ext_mode, src, dst } => { + let dst = allocs.next(dst.to_reg().to_reg()); let (opcodes, num_opcodes, mut rex_flags) = match ext_mode { ExtMode::BL => { // MOVZBL is (REX.W==0) 0F B6 /r @@ -675,6 +687,7 @@ pub(crate) fn emit( match src.clone().to_reg_mem() { RegMem::Reg { reg: src } => { + let src = allocs.next(src); match ext_mode { ExtMode::BL | ExtMode::BQ => { // A redundant REX prefix must be emitted for certain register inputs. @@ -687,14 +700,14 @@ pub(crate) fn emit( LegacyPrefixes::None, opcodes, num_opcodes, - dst.to_reg().to_reg(), + dst, src, rex_flags, ) } RegMem::Mem { addr: src } => { - let src = &src.finalize(state, sink); + let src = &src.finalize(state, sink).with_allocs(allocs); emit_std_reg_mem( sink, @@ -703,7 +716,7 @@ pub(crate) fn emit( LegacyPrefixes::None, opcodes, num_opcodes, - dst.to_reg().to_reg(), + dst, src, rex_flags, ) @@ -712,7 +725,8 @@ pub(crate) fn emit( } Inst::Mov64MR { src, dst } => { - let src = &src.finalize(state, sink); + let dst = allocs.next(dst.to_reg().to_reg()); + let src = &src.finalize(state, sink).with_allocs(allocs); emit_std_reg_mem( sink, @@ -721,14 +735,15 @@ pub(crate) fn emit( LegacyPrefixes::None, 0x8B, 1, - dst.to_reg().to_reg(), + dst, src, RexFlags::set_w(), ) } Inst::LoadEffectiveAddress { addr, dst } => { - let amode = addr.finalize(state, sink); + let dst = allocs.next(dst.to_reg().to_reg()); + let amode = addr.finalize(state, sink).with_allocs(allocs); emit_std_reg_mem( sink, @@ -737,13 +752,14 @@ pub(crate) fn emit( LegacyPrefixes::None, 0x8D, 1, - dst.to_reg().to_reg(), + dst, &amode, RexFlags::set_w(), ); } Inst::MovsxRmR { ext_mode, src, dst } => { + let dst = allocs.next(dst.to_reg().to_reg()); let (opcodes, num_opcodes, mut rex_flags) = match ext_mode { ExtMode::BL => { // MOVSBL is (REX.W==0) 0F BE /r @@ -769,6 +785,7 @@ pub(crate) fn emit( match src.clone().to_reg_mem() { RegMem::Reg { reg: src } => { + let src = allocs.next(src); match ext_mode { ExtMode::BL | ExtMode::BQ => { // A redundant REX prefix must be emitted for certain register inputs. @@ -781,14 +798,14 @@ pub(crate) fn emit( LegacyPrefixes::None, opcodes, num_opcodes, - dst.to_reg().to_reg(), + dst, src, rex_flags, ) } RegMem::Mem { addr: src } => { - let src = &src.finalize(state, sink); + let src = &src.finalize(state, sink).with_allocs(allocs); emit_std_reg_mem( sink, @@ -797,7 +814,7 @@ pub(crate) fn emit( LegacyPrefixes::None, opcodes, num_opcodes, - dst.to_reg().to_reg(), + dst, src, rex_flags, ) @@ -806,7 +823,8 @@ pub(crate) fn emit( } Inst::MovRM { size, src, dst } => { - let dst = &dst.finalize(state, sink); + let src = allocs.next(src.to_reg()); + let dst = &dst.finalize(state, sink).with_allocs(allocs); let prefix = match size { OperandSize::Size16 => LegacyPrefixes::_66, @@ -821,13 +839,13 @@ pub(crate) fn emit( // This is one of the few places where the presence of a // redundant REX prefix changes the meaning of the // instruction. - let rex = RexFlags::from((*size, src.to_reg())); + let rex = RexFlags::from((*size, src)); // 8-bit: MOV r8, r/m8 is (REX.W==0) 88 /r // 16-bit: MOV r16, r/m16 is 66 (REX.W==0) 89 /r // 32-bit: MOV r32, r/m32 is (REX.W==0) 89 /r // 64-bit: MOV r64, r/m64 is (REX.W==1) 89 /r - emit_std_reg_mem(sink, state, info, prefix, opcode, 1, src.to_reg(), dst, rex); + emit_std_reg_mem(sink, state, info, prefix, opcode, 1, src, dst, rex); } Inst::ShiftR { @@ -837,7 +855,9 @@ pub(crate) fn emit( num_bits, dst, } => { - debug_assert_eq!(*src, dst.to_reg()); + let src = allocs.next(src.to_reg()); + let dst = allocs.next(dst.to_reg().to_reg()); + debug_assert_eq!(src, dst); let subopcode = match kind { ShiftKind::RotateLeft => 0, ShiftKind::RotateRight => 1, @@ -845,10 +865,11 @@ pub(crate) fn emit( ShiftKind::ShiftRightLogical => 5, ShiftKind::ShiftRightArithmetic => 7, }; - let enc_dst = int_reg_enc(dst.to_reg()); - let rex_flags = RexFlags::from((*size, dst.to_reg().to_reg())); + let enc_dst = int_reg_enc(dst); + let rex_flags = RexFlags::from((*size, dst)); match num_bits.clone().to_imm8_reg() { Imm8Reg::Reg { reg } => { + let reg = allocs.next(reg); debug_assert_eq!(reg, regs::rcx()); let (opcode, prefix) = match size { OperandSize::Size8 => (0xD2, LegacyPrefixes::None), @@ -890,7 +911,9 @@ pub(crate) fn emit( src2, dst, } => { - debug_assert_eq!(*src1, dst.to_reg()); + let src1 = allocs.next(src1.to_reg()); + let dst = allocs.next(dst.to_reg().to_reg()); + debug_assert_eq!(src1, dst); let rex = RexFlags::clear_w(); let prefix = LegacyPrefixes::_66; let src2 = src2.clone().to_reg_mem_imm(); @@ -906,7 +929,7 @@ pub(crate) fn emit( SseOpcode::Psrlq => (0x0F73, 2), _ => panic!("invalid opcode: {}", opcode), }; - let dst_enc = reg_enc(dst.to_reg()); + let dst_enc = reg_enc(dst); emit_std_enc_enc(sink, prefix, opcode_bytes, 2, reg_digit, dst_enc, rex); let imm = (simm32) .try_into() @@ -927,18 +950,11 @@ pub(crate) fn emit( match src2 { RegMemImm::Reg { reg } => { - emit_std_reg_reg( - sink, - prefix, - opcode_bytes, - 2, - dst.to_reg().to_reg(), - reg, - rex, - ); + let reg = allocs.next(reg); + emit_std_reg_reg(sink, prefix, opcode_bytes, 2, dst, reg, rex); } RegMemImm::Mem { addr } => { - let addr = &addr.finalize(state, sink); + let addr = &addr.finalize(state, sink).with_allocs(allocs); emit_std_reg_mem( sink, state, @@ -946,7 +962,7 @@ pub(crate) fn emit( prefix, opcode_bytes, 2, - dst.to_reg().to_reg(), + dst, addr, rex, ); @@ -962,6 +978,8 @@ pub(crate) fn emit( dst: reg_g, opcode, } => { + let reg_g = allocs.next(reg_g.to_reg()); + let is_cmp = match opcode { CmpOpcode::Cmp => true, CmpOpcode::Test => false, @@ -972,10 +990,11 @@ pub(crate) fn emit( prefix = LegacyPrefixes::_66; } // A redundant REX prefix can change the meaning of this instruction. - let mut rex = RexFlags::from((*size, reg_g.to_reg())); + let mut rex = RexFlags::from((*size, reg_g)); match src_e.clone().to_reg_mem_imm() { RegMemImm::Reg { reg: reg_e } => { + let reg_e = allocs.next(reg_e); if *size == OperandSize::Size8 { // Check whether the E register forces the use of a redundant REX. rex.always_emit_if_8bit_needed(reg_e); @@ -989,11 +1008,11 @@ pub(crate) fn emit( (OperandSize::Size8, false) => 0x84, (_, false) => 0x85, }; - emit_std_reg_reg(sink, prefix, opcode, 1, reg_e, reg_g.to_reg(), rex); + emit_std_reg_reg(sink, prefix, opcode, 1, reg_e, reg_g, rex); } RegMemImm::Mem { addr } => { - let addr = &addr.finalize(state, sink); + let addr = &addr.finalize(state, sink).with_allocs(allocs); // Whereas here we revert to the "normal" G-E ordering for CMP. let opcode = match (*size, is_cmp) { (OperandSize::Size8, true) => 0x3A, @@ -1001,17 +1020,7 @@ pub(crate) fn emit( (OperandSize::Size8, false) => 0x84, (_, false) => 0x85, }; - emit_std_reg_mem( - sink, - state, - info, - prefix, - opcode, - 1, - reg_g.to_reg(), - addr, - rex, - ); + emit_std_reg_mem(sink, state, info, prefix, opcode, 1, reg_g, addr, rex); } RegMemImm::Imm { simm32 } => { @@ -1037,7 +1046,7 @@ pub(crate) fn emit( }; let subopcode = if is_cmp { 7 } else { 0 }; - let enc_g = int_reg_enc(reg_g.to_reg()); + let enc_g = int_reg_enc(reg_g); emit_std_enc_enc(sink, prefix, opcode, 1, subopcode, enc_g, rex); emit_simm(sink, if use_imm8 { 1 } else { size.to_bytes() }, simm32); } @@ -1045,6 +1054,7 @@ pub(crate) fn emit( } Inst::Setcc { cc, dst } => { + let dst = allocs.next(dst.to_reg().to_reg()); let opcode = 0x0f90 + cc.get_enc() as u32; let mut rex_flags = RexFlags::clear_w(); rex_flags.always_emit(); @@ -1054,7 +1064,7 @@ pub(crate) fn emit( opcode, 2, 0, - reg_enc(dst.to_reg()), + reg_enc(dst), rex_flags, ); } @@ -1066,7 +1076,9 @@ pub(crate) fn emit( alternative, dst, } => { - debug_assert_eq!(*alternative, dst.to_reg()); + let alternative = allocs.next(alternative.to_reg()); + let dst = allocs.next(dst.to_reg().to_reg()); + debug_assert_eq!(alternative, dst); let rex_flags = RexFlags::from(*size); let prefix = match size { OperandSize::Size16 => LegacyPrefixes::_66, @@ -1077,29 +1089,12 @@ pub(crate) fn emit( let opcode = 0x0F40 + cc.get_enc() as u32; match consequent.clone().to_reg_mem() { RegMem::Reg { reg } => { - emit_std_reg_reg( - sink, - prefix, - opcode, - 2, - dst.to_reg().to_reg(), - reg, - rex_flags, - ); + let reg = allocs.next(reg); + emit_std_reg_reg(sink, prefix, opcode, 2, dst, reg, rex_flags); } RegMem::Mem { addr } => { - let addr = &addr.finalize(state, sink); - emit_std_reg_mem( - sink, - state, - info, - prefix, - opcode, - 2, - dst.to_reg().to_reg(), - addr, - rex_flags, - ); + let addr = &addr.finalize(state, sink).with_allocs(allocs); + emit_std_reg_mem(sink, state, info, prefix, opcode, 2, dst, addr, rex_flags); } } } @@ -1111,7 +1106,10 @@ pub(crate) fn emit( alternative, dst, } => { - debug_assert_eq!(*alternative, dst.to_reg()); + let alternative = allocs.next(alternative.to_reg()); + let dst = allocs.next(dst.to_reg().to_reg()); + debug_assert_eq!(alternative, dst); + let consequent = consequent.clone().to_reg_mem().with_allocs(allocs); // Lowering of the Select IR opcode when the input is an fcmp relies on the fact that // this doesn't clobber flags. Make sure to not do so here. @@ -1125,19 +1123,20 @@ pub(crate) fn emit( } else { SseOpcode::Movss }; - let inst = - Inst::xmm_unary_rm_r(op, consequent.clone().to_reg_mem(), dst.to_writable_reg()); - inst.emit(sink, info, state); + let inst = Inst::xmm_unary_rm_r(op, consequent, Writable::from_reg(dst)); + inst.emit(&[], sink, info, state); sink.bind_label(next); } Inst::Push64 { src } => { + let src = src.clone().to_reg_mem_imm().with_allocs(allocs); + if info.flags.enable_probestack() { sink.add_trap(state.cur_srcloc(), TrapCode::StackOverflow); } - match src.clone().to_reg_mem_imm() { + match src { RegMemImm::Reg { reg } => { let enc_reg = int_reg_enc(reg); let rex = 0x40 | ((enc_reg >> 3) & 1); @@ -1175,7 +1174,8 @@ pub(crate) fn emit( } Inst::Pop64 { dst } => { - let enc_dst = int_reg_enc(dst.to_reg()); + let dst = allocs.next(dst.to_reg().to_reg()); + let enc_dst = int_reg_enc(dst); if enc_dst >= 8 { // 0x41 == REX.{W=0, B=1}. It seems that REX.W is irrelevant here. sink.put1(0x41); @@ -1202,13 +1202,15 @@ pub(crate) fn emit( } Inst::CallUnknown { dest, opcode, .. } => { + let dest = dest.with_allocs(allocs); + if info.flags.enable_probestack() { sink.add_trap(state.cur_srcloc(), TrapCode::StackOverflow); } let start_offset = sink.cur_offset(); match dest { RegMem::Reg { reg } => { - let reg_enc = int_reg_enc(*reg); + let reg_enc = int_reg_enc(reg); emit_std_enc_enc( sink, LegacyPrefixes::None, @@ -1244,7 +1246,7 @@ pub(crate) fn emit( } } - Inst::Ret {} => sink.put1(0xC3), + Inst::Ret { .. } => sink.put1(0xC3), Inst::JmpKnown { dst } => { let br_start = sink.cur_offset(); @@ -1305,9 +1307,11 @@ pub(crate) fn emit( } Inst::JmpUnknown { target } => { + let target = target.with_allocs(allocs); + match target { RegMem::Reg { reg } => { - let reg_enc = int_reg_enc(*reg); + let reg_enc = int_reg_enc(reg); emit_std_enc_enc( sink, LegacyPrefixes::None, @@ -1344,6 +1348,10 @@ pub(crate) fn emit( default_target, .. } => { + let idx = allocs.next(*idx); + let tmp1 = Writable::from_reg(allocs.next(tmp1.to_reg())); + let tmp2 = Writable::from_reg(allocs.next(tmp2.to_reg())); + // This sequence is *one* instruction in the vcode, and is expanded only here at // emission time, because we cannot allow the regalloc to insert spills/reloads in // the middle; we depend on hardcoded PC-rel addressing below. @@ -1368,13 +1376,13 @@ pub(crate) fn emit( one_way_jmp(sink, CC::NB, *default_target); // idx unsigned >= jmp table size // Copy the index (and make sure to clear the high 32-bits lane of tmp2). - let inst = Inst::movzx_rm_r(ExtMode::LQ, RegMem::reg(*idx), *tmp2); - inst.emit(sink, info, state); + let inst = Inst::movzx_rm_r(ExtMode::LQ, RegMem::reg(idx), tmp2); + inst.emit(&[], sink, info, state); // Load base address of jump table. let start_of_jumptable = sink.get_label(); - let inst = Inst::lea(Amode::rip_relative(start_of_jumptable), *tmp1); - inst.emit(sink, info, state); + let inst = Inst::lea(Amode::rip_relative(start_of_jumptable), tmp1); + inst.emit(&[], sink, info, state); // Load value out of the jump table. It's a relative offset to the target block, so it // might be negative; use a sign-extension. @@ -1386,22 +1394,22 @@ pub(crate) fn emit( Gpr::new(tmp2.to_reg()).unwrap(), 2, )), - *tmp2, + tmp2, ); - inst.emit(sink, info, state); + inst.emit(&[], sink, info, state); // Add base of jump table to jump-table-sourced block offset. let inst = Inst::alu_rmi_r( OperandSize::Size64, AluRmiROpcode::Add, RegMemImm::reg(tmp2.to_reg()), - *tmp1, + tmp1, ); - inst.emit(sink, info, state); + inst.emit(&[], sink, info, state); // Branch to computed address. let inst = Inst::jmp_unknown(RegMem::reg(tmp1.to_reg())); - inst.emit(sink, info, state); + inst.emit(&[], sink, info, state); // Emit jump table (table of 32-bit offsets). sink.bind_label(start_of_jumptable); @@ -1426,7 +1434,7 @@ pub(crate) fn emit( // Trap! let inst = Inst::trap(*trap_code); - inst.emit(sink, info, state); + inst.emit(&[], sink, info, state); sink.bind_label(else_label); } @@ -1436,6 +1444,9 @@ pub(crate) fn emit( src: src_e, dst: reg_g, } => { + let reg_g = allocs.next(reg_g.to_reg().to_reg()); + let src_e = src_e.clone().to_reg_mem().with_allocs(allocs); + let rex = RexFlags::clear_w(); let (prefix, opcode, num_opcodes) = match op { @@ -1474,17 +1485,9 @@ pub(crate) fn emit( _ => unimplemented!("Opcode {:?} not implemented", op), }; - match src_e.clone().to_reg_mem() { + match src_e { RegMem::Reg { reg: reg_e } => { - emit_std_reg_reg( - sink, - prefix, - opcode, - num_opcodes, - reg_g.to_reg().to_reg(), - reg_e, - rex, - ); + emit_std_reg_reg(sink, prefix, opcode, num_opcodes, reg_g, reg_e, rex); } RegMem::Mem { addr } => { let addr = &addr.finalize(state, sink); @@ -1495,7 +1498,7 @@ pub(crate) fn emit( prefix, opcode, num_opcodes, - reg_g.to_reg().to_reg(), + reg_g, addr, rex, ); @@ -1504,21 +1507,24 @@ pub(crate) fn emit( } Inst::XmmUnaryRmREvex { op, src, dst } => { + let dst = allocs.next(dst.to_reg().to_reg()); + let src = src.clone().to_reg_mem().with_allocs(allocs); + let (prefix, map, w, opcode) = match op { Avx512Opcode::Vcvtudq2ps => (LegacyPrefixes::_F2, OpcodeMap::_0F, false, 0x7a), Avx512Opcode::Vpabsq => (LegacyPrefixes::_66, OpcodeMap::_0F38, true, 0x1f), Avx512Opcode::Vpopcntb => (LegacyPrefixes::_66, OpcodeMap::_0F38, false, 0x54), _ => unimplemented!("Opcode {:?} not implemented", op), }; - match src.clone().to_reg_mem() { + match src { RegMem::Reg { reg: src } => EvexInstruction::new() .length(EvexVectorLength::V128) .prefix(prefix) .map(map) .w(w) .opcode(opcode) - .reg(dst.to_reg().get_hw_encoding()) - .rm(src.get_hw_encoding()) + .reg(dst.to_real_reg().unwrap().hw_enc()) + .rm(src.to_real_reg().unwrap().hw_enc()) .encode(sink), _ => todo!(), }; @@ -1530,7 +1536,17 @@ pub(crate) fn emit( src2: src_e, dst: reg_g, } => { - debug_assert_eq!(*src1, reg_g.to_reg()); + let (src_e, reg_g) = if inst.produces_const() { + let reg_g = allocs.next(reg_g.to_reg().to_reg()); + (RegMem::Reg { reg: reg_g }, reg_g) + } else { + let src1 = allocs.next(src1.to_reg()); + let reg_g = allocs.next(reg_g.to_reg().to_reg()); + let src_e = src_e.clone().to_reg_mem().with_allocs(allocs); + debug_assert_eq!(src1, reg_g); + (src_e, reg_g) + }; + let rex = RexFlags::clear_w(); let (prefix, opcode, length) = match op { SseOpcode::Addps => (LegacyPrefixes::None, 0x0F58, 2), @@ -1637,31 +1653,13 @@ pub(crate) fn emit( _ => unimplemented!("Opcode {:?} not implemented", op), }; - match src_e.clone().to_reg_mem() { + match src_e { RegMem::Reg { reg: reg_e } => { - emit_std_reg_reg( - sink, - prefix, - opcode, - length, - reg_g.to_reg().to_reg(), - reg_e, - rex, - ); + emit_std_reg_reg(sink, prefix, opcode, length, reg_g, reg_e, rex); } RegMem::Mem { addr } => { let addr = &addr.finalize(state, sink); - emit_std_reg_mem( - sink, - state, - info, - prefix, - opcode, - length, - reg_g.to_reg().to_reg(), - addr, - rex, - ); + emit_std_reg_mem(sink, state, info, prefix, opcode, length, reg_g, addr, rex); } } } @@ -1672,21 +1670,25 @@ pub(crate) fn emit( src2, dst, } => { + let dst = allocs.next(dst.to_reg().to_reg()); + let src2 = allocs.next(src2.to_reg()); + let src1 = src1.clone().to_reg_mem().with_allocs(allocs); + let (w, opcode) = match op { Avx512Opcode::Vpermi2b => (false, 0x75), Avx512Opcode::Vpmullq => (true, 0x40), _ => unimplemented!("Opcode {:?} not implemented", op), }; - match src1.clone().to_reg_mem() { + match src1 { RegMem::Reg { reg: src } => EvexInstruction::new() .length(EvexVectorLength::V128) .prefix(LegacyPrefixes::_66) .map(OpcodeMap::_0F38) .w(w) .opcode(opcode) - .reg(dst.to_reg().get_hw_encoding()) - .rm(src.get_hw_encoding()) - .vvvvv(src2.get_hw_encoding()) + .reg(dst.to_real_reg().unwrap().hw_enc()) + .rm(src.to_real_reg().unwrap().hw_enc()) + .vvvvv(src2.to_real_reg().unwrap().hw_enc()) .encode(sink), _ => todo!(), }; @@ -1699,7 +1701,10 @@ pub(crate) fn emit( rhs, dst, } => { - debug_assert_eq!(*rhs, dst.to_reg()); + let rhs = allocs.next(rhs.to_reg()); + let lhs = allocs.next(lhs.to_reg()); + let dst = allocs.next(dst.to_reg().to_reg()); + debug_assert_eq!(rhs, dst); // Generates the following sequence: // cmpss/cmpsd %lhs, %rhs_dst @@ -1750,8 +1755,8 @@ pub(crate) fn emit( _ => unreachable!(), }; - let inst = Inst::xmm_cmp_rm_r(cmp_op, RegMem::reg(lhs.to_reg()), dst.to_reg().to_reg()); - inst.emit(sink, info, state); + let inst = Inst::xmm_cmp_rm_r(cmp_op, RegMem::reg(lhs), dst); + inst.emit(&[], sink, info, state); one_way_jmp(sink, CC::NZ, do_min_max); one_way_jmp(sink, CC::P, propagate_nan); @@ -1760,24 +1765,25 @@ pub(crate) fn emit( // and negative zero. These instructions merge the sign bits in that // case, and are no-ops otherwise. let op = if *is_min { or_op } else { and_op }; - let inst = Inst::xmm_rm_r(op, RegMem::reg(lhs.to_reg()), dst.to_writable_reg()); - inst.emit(sink, info, state); + let inst = Inst::xmm_rm_r(op, RegMem::reg(lhs), Writable::from_reg(dst)); + inst.emit(&[], sink, info, state); let inst = Inst::jmp_known(done); - inst.emit(sink, info, state); + inst.emit(&[], sink, info, state); // x86's min/max are not symmetric; if either operand is a NaN, they return the // read-only operand: perform an addition between the two operands, which has the // desired NaN propagation effects. sink.bind_label(propagate_nan); - let inst = Inst::xmm_rm_r(add_op, RegMem::reg(lhs.to_reg()), dst.to_writable_reg()); - inst.emit(sink, info, state); + let inst = Inst::xmm_rm_r(add_op, RegMem::reg(lhs), Writable::from_reg(dst)); + inst.emit(&[], sink, info, state); one_way_jmp(sink, CC::P, done); sink.bind_label(do_min_max); - let inst = Inst::xmm_rm_r(min_max_op, RegMem::reg(lhs.to_reg()), dst.to_writable_reg()); - inst.emit(sink, info, state); + + let inst = Inst::xmm_rm_r(min_max_op, RegMem::reg(lhs), Writable::from_reg(dst)); + inst.emit(&[], sink, info, state); sink.bind_label(done); } @@ -1790,7 +1796,21 @@ pub(crate) fn emit( imm, size, } => { - debug_assert_eq!(*src1, dst.to_reg()); + let (src2, dst) = if inst.produces_const() { + let dst = allocs.next(dst.to_reg()); + (RegMem::Reg { reg: dst }, dst) + } else if !op.uses_src1() { + let dst = allocs.next(dst.to_reg()); + let src2 = src2.with_allocs(allocs); + (src2, dst) + } else { + let src1 = allocs.next(*src1); + let dst = allocs.next(dst.to_reg()); + let src2 = src2.with_allocs(allocs); + debug_assert_eq!(src1, dst); + (src2, dst) + }; + let (prefix, opcode, len) = match op { SseOpcode::Cmpps => (LegacyPrefixes::None, 0x0FC2, 2), SseOpcode::Cmppd => (LegacyPrefixes::_66, 0x0FC2, 2), @@ -1824,9 +1844,9 @@ pub(crate) fn emit( match src2 { RegMem::Reg { reg } => { if regs_swapped { - emit_std_reg_reg(sink, prefix, opcode, len, *reg, dst.to_reg(), rex); + emit_std_reg_reg(sink, prefix, opcode, len, reg, dst, rex); } else { - emit_std_reg_reg(sink, prefix, opcode, len, dst.to_reg(), *reg, rex); + emit_std_reg_reg(sink, prefix, opcode, len, dst, reg, rex); } } RegMem::Mem { addr } => { @@ -1835,26 +1855,17 @@ pub(crate) fn emit( !regs_swapped, "No existing way to encode a mem argument in the ModRM r/m field." ); - emit_std_reg_mem( - sink, - state, - info, - prefix, - opcode, - len, - dst.to_reg(), - addr, - rex, - ); + emit_std_reg_mem(sink, state, info, prefix, opcode, len, dst, addr, rex); } } sink.put1(*imm); } Inst::XmmLoadConst { src, dst, ty } => { + let dst = allocs.next(dst.to_reg()); let load_offset = Amode::rip_relative(sink.get_label_for_constant(*src)); - let load = Inst::load(*ty, load_offset, *dst, ExtKind::None); - load.emit(sink, info, state); + let load = Inst::load(*ty, load_offset, Writable::from_reg(dst), ExtKind::None); + load.emit(&[], sink, info, state); } Inst::XmmUninitializedValue { .. } => { @@ -1863,6 +1874,9 @@ pub(crate) fn emit( } Inst::XmmMovRM { op, src, dst } => { + let src = allocs.next(*src); + let dst = dst.with_allocs(allocs); + let (prefix, opcode) = match op { SseOpcode::Movaps => (LegacyPrefixes::None, 0x0F29), SseOpcode::Movapd => (LegacyPrefixes::_66, 0x0F29), @@ -1881,7 +1895,7 @@ pub(crate) fn emit( prefix, opcode, 2, - *src, + src, dst, RexFlags::clear_w(), ); @@ -1893,6 +1907,9 @@ pub(crate) fn emit( dst, dst_size, } => { + let src = allocs.next(src.to_reg()); + let dst = allocs.next(dst.to_reg().to_reg()); + let (prefix, opcode, dst_first) = match op { SseOpcode::Cvttss2si => (LegacyPrefixes::_F3, 0x0F2C, true), SseOpcode::Cvttsd2si => (LegacyPrefixes::_F2, 0x0F2C, true), @@ -1905,11 +1922,7 @@ pub(crate) fn emit( _ => panic!("unexpected opcode {:?}", op), }; let rex = RexFlags::from(*dst_size); - let (src, dst) = if dst_first { - (dst.to_reg().to_reg(), src.to_reg()) - } else { - (src.to_reg(), dst.to_reg().to_reg()) - }; + let (src, dst) = if dst_first { (dst, src) } else { (src, dst) }; emit_std_reg_reg(sink, prefix, opcode, 2, src, dst, rex); } @@ -1920,6 +1933,9 @@ pub(crate) fn emit( dst: reg_g, src_size, } => { + let reg_g = allocs.next(reg_g.to_reg().to_reg()); + let src_e = src_e.clone().to_reg_mem().with_allocs(allocs); + let (prefix, opcode) = match op { // Movd and movq use the same opcode; the presence of the REX prefix (set below) // actually determines which is used. @@ -1929,28 +1945,21 @@ pub(crate) fn emit( _ => panic!("unexpected opcode {:?}", op), }; let rex = RexFlags::from(*src_size); - match src_e.clone().to_reg_mem() { + match src_e { RegMem::Reg { reg: reg_e } => { - emit_std_reg_reg(sink, prefix, opcode, 2, reg_g.to_reg().to_reg(), reg_e, rex); + emit_std_reg_reg(sink, prefix, opcode, 2, reg_g, reg_e, rex); } RegMem::Mem { addr } => { let addr = &addr.finalize(state, sink); - emit_std_reg_mem( - sink, - state, - info, - prefix, - opcode, - 2, - reg_g.to_reg().to_reg(), - addr, - rex, - ); + emit_std_reg_mem(sink, state, info, prefix, opcode, 2, reg_g, addr, rex); } } } Inst::XmmCmpRmR { op, src, dst } => { + let dst = allocs.next(dst.to_reg()); + let src = src.clone().to_reg_mem().with_allocs(allocs); + let rex = RexFlags::clear_w(); let (prefix, opcode, len) = match op { SseOpcode::Ptest => (LegacyPrefixes::_66, 0x0F3817, 3), @@ -1959,23 +1968,13 @@ pub(crate) fn emit( _ => unimplemented!("Emit xmm cmp rm r"), }; - match src.clone().to_reg_mem() { + match src { RegMem::Reg { reg } => { - emit_std_reg_reg(sink, prefix, opcode, len, dst.to_reg(), reg, rex); + emit_std_reg_reg(sink, prefix, opcode, len, dst, reg, rex); } RegMem::Mem { addr } => { let addr = &addr.finalize(state, sink); - emit_std_reg_mem( - sink, - state, - info, - prefix, - opcode, - len, - dst.to_reg(), - addr, - rex, - ); + emit_std_reg_mem(sink, state, info, prefix, opcode, len, dst, addr, rex); } } } @@ -1987,6 +1986,11 @@ pub(crate) fn emit( tmp_gpr1, tmp_gpr2, } => { + let src = allocs.next(src.to_reg().to_reg()); + let dst = allocs.next(dst.to_reg().to_reg()); + let tmp_gpr1 = allocs.next(tmp_gpr1.to_reg().to_reg()); + let tmp_gpr2 = allocs.next(tmp_gpr2.to_reg().to_reg()); + // Note: this sequence is specific to 64-bit mode; a 32-bit mode would require a // different sequence. // @@ -2021,12 +2025,8 @@ pub(crate) fn emit( // If x seen as a signed int64 is not negative, a signed-conversion will do the right // thing. // TODO use tst src, src here. - let inst = Inst::cmp_rmi_r( - OperandSize::Size64, - RegMemImm::imm(0), - src.to_reg().to_reg(), - ); - inst.emit(sink, info, state); + let inst = Inst::cmp_rmi_r(OperandSize::Size64, RegMemImm::imm(0), src); + inst.emit(&[], sink, info, state); one_way_jmp(sink, CC::L, handle_negative); @@ -2036,63 +2036,55 @@ pub(crate) fn emit( sink, info, state, - src.to_reg().to_reg(), - dst.to_writable_reg(), + src, + Writable::from_reg(dst), *dst_size == OperandSize::Size64, ); let inst = Inst::jmp_known(done); - inst.emit(sink, info, state); + inst.emit(&[], sink, info, state); sink.bind_label(handle_negative); // Divide x by two to get it in range for the signed conversion, keep the LSB, and // scale it back up on the FP side. - let inst = Inst::gen_move( - tmp_gpr1.to_writable_reg(), - src.to_reg().to_reg(), - types::I64, - ); - inst.emit(sink, info, state); + let inst = Inst::gen_move(Writable::from_reg(tmp_gpr1), src, types::I64); + inst.emit(&[], sink, info, state); // tmp_gpr1 := src >> 1 let inst = Inst::shift_r( OperandSize::Size64, ShiftKind::ShiftRightLogical, Some(1), - tmp_gpr1.to_writable_reg(), + Writable::from_reg(tmp_gpr1), ); - inst.emit(sink, info, state); + inst.emit(&[], sink, info, state); - let inst = Inst::gen_move( - tmp_gpr2.to_writable_reg(), - src.to_reg().to_reg(), - types::I64, - ); - inst.emit(sink, info, state); + let inst = Inst::gen_move(Writable::from_reg(tmp_gpr2), src, types::I64); + inst.emit(&[], sink, info, state); let inst = Inst::alu_rmi_r( OperandSize::Size64, AluRmiROpcode::And, RegMemImm::imm(1), - tmp_gpr2.to_writable_reg(), + Writable::from_reg(tmp_gpr2), ); - inst.emit(sink, info, state); + inst.emit(&[], sink, info, state); let inst = Inst::alu_rmi_r( OperandSize::Size64, AluRmiROpcode::Or, - RegMemImm::reg(tmp_gpr1.to_reg().to_reg()), - tmp_gpr2.to_writable_reg(), + RegMemImm::reg(tmp_gpr1), + Writable::from_reg(tmp_gpr2), ); - inst.emit(sink, info, state); + inst.emit(&[], sink, info, state); emit_signed_cvt( sink, info, state, - tmp_gpr2.to_reg().to_reg(), - dst.to_writable_reg(), + tmp_gpr2, + Writable::from_reg(dst), *dst_size == OperandSize::Size64, ); @@ -2101,12 +2093,8 @@ pub(crate) fn emit( } else { SseOpcode::Addss }; - let inst = Inst::xmm_rm_r( - add_op, - RegMem::reg(dst.to_reg().to_reg()), - dst.to_writable_reg(), - ); - inst.emit(sink, info, state); + let inst = Inst::xmm_rm_r(add_op, RegMem::reg(dst), Writable::from_reg(dst)); + inst.emit(&[], sink, info, state); sink.bind_label(done); } @@ -2120,6 +2108,11 @@ pub(crate) fn emit( tmp_gpr, tmp_xmm, } => { + let src = allocs.next(src.to_reg().to_reg()); + let dst = allocs.next(dst.to_reg().to_reg()); + let tmp_gpr = allocs.next(tmp_gpr.to_reg().to_reg()); + let tmp_xmm = allocs.next(tmp_xmm.to_reg().to_reg()); + // Emits the following common sequence: // // cvttss2si/cvttsd2si %src, %dst @@ -2166,8 +2159,6 @@ pub(crate) fn emit( // // done: - let src = src.to_reg(); - let (cast_op, cmp_op, trunc_op) = match src_size { OperandSize::Size64 => (SseOpcode::Movq, SseOpcode::Ucomisd, SseOpcode::Cvttsd2si), OperandSize::Size32 => (SseOpcode::Movd, SseOpcode::Ucomiss, SseOpcode::Cvttss2si), @@ -2178,19 +2169,19 @@ pub(crate) fn emit( let not_nan = sink.get_label(); // The truncation. - let inst = Inst::xmm_to_gpr(trunc_op, src.to_reg(), dst.to_writable_reg(), *dst_size); - inst.emit(sink, info, state); + let inst = Inst::xmm_to_gpr(trunc_op, src, Writable::from_reg(dst), *dst_size); + inst.emit(&[], sink, info, state); // Compare against 1, in case of overflow the dst operand was INT_MIN. - let inst = Inst::cmp_rmi_r(*dst_size, RegMemImm::imm(1), dst.to_reg().to_reg()); - inst.emit(sink, info, state); + let inst = Inst::cmp_rmi_r(*dst_size, RegMemImm::imm(1), dst); + inst.emit(&[], sink, info, state); one_way_jmp(sink, CC::NO, done); // no overflow => done // Check for NaN. - let inst = Inst::xmm_cmp_rm_r(cmp_op, RegMem::reg(src.to_reg()), src.to_reg()); - inst.emit(sink, info, state); + let inst = Inst::xmm_cmp_rm_r(cmp_op, RegMem::reg(src), src); + inst.emit(&[], sink, info, state); one_way_jmp(sink, CC::NP, not_nan); // go to not_nan if not a NaN @@ -2199,13 +2190,13 @@ pub(crate) fn emit( let inst = Inst::alu_rmi_r( *dst_size, AluRmiROpcode::Xor, - RegMemImm::reg(dst.to_reg().to_reg()), - dst.to_writable_reg(), + RegMemImm::reg(dst), + Writable::from_reg(dst), ); - inst.emit(sink, info, state); + inst.emit(&[], sink, info, state); let inst = Inst::jmp_known(done); - inst.emit(sink, info, state); + inst.emit(&[], sink, info, state); sink.bind_label(not_nan); @@ -2214,17 +2205,13 @@ pub(crate) fn emit( // Zero out tmp_xmm. let inst = Inst::xmm_rm_r( SseOpcode::Xorpd, - RegMem::reg(tmp_xmm.to_reg().to_reg()), - tmp_xmm.to_writable_reg(), + RegMem::reg(tmp_xmm), + Writable::from_reg(tmp_xmm), ); - inst.emit(sink, info, state); + inst.emit(&[], sink, info, state); - let inst = Inst::xmm_cmp_rm_r( - cmp_op, - RegMem::reg(src.to_reg()), - tmp_xmm.to_reg().to_reg(), - ); - inst.emit(sink, info, state); + let inst = Inst::xmm_cmp_rm_r(cmp_op, RegMem::reg(src), tmp_xmm); + inst.emit(&[], sink, info, state); // Jump if >= to done. one_way_jmp(sink, CC::NB, done); @@ -2234,18 +2221,18 @@ pub(crate) fn emit( let inst = Inst::imm( OperandSize::Size64, 0x7fffffffffffffff, - dst.to_writable_reg(), + Writable::from_reg(dst), ); - inst.emit(sink, info, state); + inst.emit(&[], sink, info, state); } else { - let inst = Inst::imm(OperandSize::Size32, 0x7fffffff, dst.to_writable_reg()); - inst.emit(sink, info, state); + let inst = Inst::imm(OperandSize::Size32, 0x7fffffff, Writable::from_reg(dst)); + inst.emit(&[], sink, info, state); } } else { let check_positive = sink.get_label(); let inst = Inst::trap(TrapCode::BadConversionToInteger); - inst.emit(sink, info, state); + inst.emit(&[], sink, info, state); // Check if INT_MIN was the correct result: determine the smallest floating point // number that would convert to INT_MIN, put it in a temporary register, and compare @@ -2261,8 +2248,8 @@ pub(crate) fn emit( OperandSize::Size32 => { let cst = Ieee32::pow2(output_bits - 1).neg().bits(); let inst = - Inst::imm(OperandSize::Size32, cst as u64, tmp_gpr.to_writable_reg()); - inst.emit(sink, info, state); + Inst::imm(OperandSize::Size32, cst as u64, Writable::from_reg(tmp_gpr)); + inst.emit(&[], sink, info, state); } OperandSize::Size64 => { // An f64 can represent `i32::min_value() - 1` exactly with precision to spare, @@ -2274,32 +2261,28 @@ pub(crate) fn emit( Ieee64::pow2(output_bits - 1).neg() }; let inst = - Inst::imm(OperandSize::Size64, cst.bits(), tmp_gpr.to_writable_reg()); - inst.emit(sink, info, state); + Inst::imm(OperandSize::Size64, cst.bits(), Writable::from_reg(tmp_gpr)); + inst.emit(&[], sink, info, state); } _ => unreachable!(), } let inst = Inst::gpr_to_xmm( cast_op, - RegMem::reg(tmp_gpr.to_reg().to_reg()), + RegMem::reg(tmp_gpr), *src_size, - tmp_xmm.to_writable_reg(), + Writable::from_reg(tmp_xmm), ); - inst.emit(sink, info, state); + inst.emit(&[], sink, info, state); - let inst = Inst::xmm_cmp_rm_r( - cmp_op, - RegMem::reg(tmp_xmm.to_reg().to_reg()), - src.to_reg(), - ); - inst.emit(sink, info, state); + let inst = Inst::xmm_cmp_rm_r(cmp_op, RegMem::reg(tmp_xmm), src); + inst.emit(&[], sink, info, state); // jump over trap if src >= or > threshold one_way_jmp(sink, no_overflow_cc, check_positive); let inst = Inst::trap(TrapCode::IntegerOverflow); - inst.emit(sink, info, state); + inst.emit(&[], sink, info, state); // If positive, it was a real overflow. @@ -2308,22 +2291,18 @@ pub(crate) fn emit( // Zero out the tmp_xmm register. let inst = Inst::xmm_rm_r( SseOpcode::Xorpd, - RegMem::reg(tmp_xmm.to_reg().to_reg()), - tmp_xmm.to_writable_reg(), + RegMem::reg(tmp_xmm), + Writable::from_reg(tmp_xmm), ); - inst.emit(sink, info, state); + inst.emit(&[], sink, info, state); - let inst = Inst::xmm_cmp_rm_r( - cmp_op, - RegMem::reg(src.to_reg()), - tmp_xmm.to_reg().to_reg(), - ); - inst.emit(sink, info, state); + let inst = Inst::xmm_cmp_rm_r(cmp_op, RegMem::reg(src), tmp_xmm); + inst.emit(&[], sink, info, state); one_way_jmp(sink, CC::NB, done); // jump over trap if 0 >= src let inst = Inst::trap(TrapCode::IntegerOverflow); - inst.emit(sink, info, state); + inst.emit(&[], sink, info, state); } sink.bind_label(done); @@ -2338,6 +2317,11 @@ pub(crate) fn emit( tmp_gpr, tmp_xmm, } => { + let src = allocs.next(src.to_reg().to_reg()); + let dst = allocs.next(dst.to_reg().to_reg()); + let tmp_gpr = allocs.next(tmp_gpr.to_reg().to_reg()); + let tmp_xmm = allocs.next(tmp_xmm.to_reg().to_reg()); + // The only difference in behavior between saturating and non-saturating is how we // handle errors. Emits the following sequence: // @@ -2397,23 +2381,19 @@ pub(crate) fn emit( _ => unreachable!(), }; - let inst = Inst::imm(*src_size, cst, tmp_gpr.to_writable_reg()); - inst.emit(sink, info, state); + let inst = Inst::imm(*src_size, cst, Writable::from_reg(tmp_gpr)); + inst.emit(&[], sink, info, state); let inst = Inst::gpr_to_xmm( cast_op, - RegMem::reg(tmp_gpr.to_reg().to_reg()), + RegMem::reg(tmp_gpr), *src_size, - tmp_xmm.to_writable_reg(), + Writable::from_reg(tmp_xmm), ); - inst.emit(sink, info, state); + inst.emit(&[], sink, info, state); - let inst = Inst::xmm_cmp_rm_r( - cmp_op, - RegMem::reg(tmp_xmm.to_reg().to_reg()), - src.to_reg().to_reg(), - ); - inst.emit(sink, info, state); + let inst = Inst::xmm_cmp_rm_r(cmp_op, RegMem::reg(tmp_xmm), src); + inst.emit(&[], sink, info, state); let handle_large = sink.get_label(); one_way_jmp(sink, CC::NB, handle_large); // jump to handle_large if src >= large_threshold @@ -2426,17 +2406,17 @@ pub(crate) fn emit( let inst = Inst::alu_rmi_r( *dst_size, AluRmiROpcode::Xor, - RegMemImm::reg(dst.to_reg().to_reg()), - dst.to_writable_reg(), + RegMemImm::reg(dst), + Writable::from_reg(dst), ); - inst.emit(sink, info, state); + inst.emit(&[], sink, info, state); let inst = Inst::jmp_known(done); - inst.emit(sink, info, state); + inst.emit(&[], sink, info, state); } else { // Trap. let inst = Inst::trap(TrapCode::BadConversionToInteger); - inst.emit(sink, info, state); + inst.emit(&[], sink, info, state); } sink.bind_label(not_nan); @@ -2444,16 +2424,11 @@ pub(crate) fn emit( // Actual truncation for small inputs: if the result is not positive, then we had an // overflow. - let inst = Inst::xmm_to_gpr( - trunc_op, - src.to_reg().to_reg(), - dst.to_writable_reg(), - *dst_size, - ); - inst.emit(sink, info, state); + let inst = Inst::xmm_to_gpr(trunc_op, src, Writable::from_reg(dst), *dst_size); + inst.emit(&[], sink, info, state); - let inst = Inst::cmp_rmi_r(*dst_size, RegMemImm::imm(0), dst.to_reg().to_reg()); - inst.emit(sink, info, state); + let inst = Inst::cmp_rmi_r(*dst_size, RegMemImm::imm(0), dst); + inst.emit(&[], sink, info, state); one_way_jmp(sink, CC::NL, done); // if dst >= 0, jump to done @@ -2463,40 +2438,31 @@ pub(crate) fn emit( let inst = Inst::alu_rmi_r( *dst_size, AluRmiROpcode::Xor, - RegMemImm::reg(dst.to_reg().to_reg()), - dst.to_writable_reg(), + RegMemImm::reg(dst), + Writable::from_reg(dst), ); - inst.emit(sink, info, state); + inst.emit(&[], sink, info, state); let inst = Inst::jmp_known(done); - inst.emit(sink, info, state); + inst.emit(&[], sink, info, state); } else { // Trap. let inst = Inst::trap(TrapCode::IntegerOverflow); - inst.emit(sink, info, state); + inst.emit(&[], sink, info, state); } // Now handle large inputs. sink.bind_label(handle_large); - let inst = Inst::xmm_rm_r( - sub_op, - RegMem::reg(tmp_xmm.to_reg().to_reg()), - src.to_writable_reg(), - ); - inst.emit(sink, info, state); + let inst = Inst::xmm_rm_r(sub_op, RegMem::reg(tmp_xmm), Writable::from_reg(src)); + inst.emit(&[], sink, info, state); - let inst = Inst::xmm_to_gpr( - trunc_op, - src.to_reg().to_reg(), - dst.to_writable_reg(), - *dst_size, - ); - inst.emit(sink, info, state); + let inst = Inst::xmm_to_gpr(trunc_op, src, Writable::from_reg(dst), *dst_size); + inst.emit(&[], sink, info, state); - let inst = Inst::cmp_rmi_r(*dst_size, RegMemImm::imm(0), dst.to_reg().to_reg()); - inst.emit(sink, info, state); + let inst = Inst::cmp_rmi_r(*dst_size, RegMemImm::imm(0), dst); + inst.emit(&[], sink, info, state); let next_is_large = sink.get_label(); one_way_jmp(sink, CC::NL, next_is_large); // if dst >= 0, jump to next_is_large @@ -2511,47 +2477,49 @@ pub(crate) fn emit( } else { u32::max_value() as u64 }, - dst.to_writable_reg(), + Writable::from_reg(dst), ); - inst.emit(sink, info, state); + inst.emit(&[], sink, info, state); let inst = Inst::jmp_known(done); - inst.emit(sink, info, state); + inst.emit(&[], sink, info, state); } else { let inst = Inst::trap(TrapCode::IntegerOverflow); - inst.emit(sink, info, state); + inst.emit(&[], sink, info, state); } sink.bind_label(next_is_large); if *dst_size == OperandSize::Size64 { - let inst = Inst::imm(OperandSize::Size64, 1 << 63, tmp_gpr.to_writable_reg()); - inst.emit(sink, info, state); + let inst = Inst::imm(OperandSize::Size64, 1 << 63, Writable::from_reg(tmp_gpr)); + inst.emit(&[], sink, info, state); let inst = Inst::alu_rmi_r( OperandSize::Size64, AluRmiROpcode::Add, - RegMemImm::reg(tmp_gpr.to_reg().to_reg()), - dst.to_writable_reg(), + RegMemImm::reg(tmp_gpr), + Writable::from_reg(dst), ); - inst.emit(sink, info, state); + inst.emit(&[], sink, info, state); } else { let inst = Inst::alu_rmi_r( OperandSize::Size32, AluRmiROpcode::Add, RegMemImm::imm(1 << 31), - dst.to_writable_reg(), + Writable::from_reg(dst), ); - inst.emit(sink, info, state); + inst.emit(&[], sink, info, state); } sink.bind_label(done); } Inst::LoadExtName { dst, name, offset } => { + let dst = allocs.next(dst.to_reg()); + if info.flags.is_pic() { // Generates: movq symbol@GOTPCREL(%rip), %dst - let enc_dst = int_reg_enc(dst.to_reg()); + let enc_dst = int_reg_enc(dst); sink.put1(0x48 | ((enc_dst >> 3) & 1) << 2); sink.put1(0x8B); sink.put1(0x05 | ((enc_dst & 7) << 3)); @@ -2575,7 +2543,7 @@ pub(crate) fn emit( } else { // The full address can be encoded in the register, with a relocation. // Generates: movabsq $name, %dst - let enc_dst = int_reg_enc(dst.to_reg()); + let enc_dst = int_reg_enc(dst); sink.put1(0x48 | ((enc_dst >> 3) & 1)); sink.put1(0xB8 | (enc_dst & 7)); emit_reloc(sink, state, Reloc::Abs8, name, *offset); @@ -2594,8 +2562,13 @@ pub(crate) fn emit( mem, dst_old, } => { - debug_assert_eq!(*expected, regs::rax()); - debug_assert_eq!(dst_old.to_reg(), regs::rax()); + let replacement = allocs.next(*replacement); + let expected = allocs.next(*expected); + let dst_old = allocs.next(dst_old.to_reg()); + let mem = mem.with_allocs(allocs); + + debug_assert_eq!(expected, regs::rax()); + debug_assert_eq!(dst_old, regs::rax()); // lock cmpxchg{b,w,l,q} %replacement, (mem) // Note that 0xF0 is the Lock prefix. @@ -2606,7 +2579,7 @@ pub(crate) fn emit( types::I64 => (LegacyPrefixes::_F0, 0x0FB1), _ => unreachable!(), }; - let rex = RexFlags::from((OperandSize::from_ty(*ty), *replacement)); + let rex = RexFlags::from((OperandSize::from_ty(*ty), replacement)); let amode = mem.finalize(state, sink); emit_std_reg_mem( sink, @@ -2615,7 +2588,7 @@ pub(crate) fn emit( prefix, opcodes, 2, - *replacement, + replacement, &amode, rex, ); @@ -2629,6 +2602,7 @@ pub(crate) fn emit( temp, dst_old, } => { + // FIXME: use real vregs for this seq. debug_assert_eq!(*address, regs::r9()); debug_assert_eq!(*operand, regs::r10()); debug_assert_eq!(temp.to_reg(), regs::r11()); @@ -2662,31 +2636,31 @@ pub(crate) fn emit( // mov{zbq,zwq,zlq,q} (%r9), %rax // No need to call `add_trap` here, since the `i1` emit will do that. let i1 = Inst::load(*ty, amode.clone(), rax_w, ExtKind::ZeroExtend); - i1.emit(sink, info, state); + i1.emit(&[], sink, info, state); // again: sink.bind_label(again_label); // movq %rax, %r11 let i2 = Inst::mov_r_r(OperandSize::Size64, rax, r11_w); - i2.emit(sink, info, state); + i2.emit(&[], sink, info, state); let r10_rmi = RegMemImm::reg(r10); match op { inst_common::AtomicRmwOp::Xchg => { // movq %r10, %r11 let i3 = Inst::mov_r_r(OperandSize::Size64, r10, r11_w); - i3.emit(sink, info, state); + i3.emit(&[], sink, info, state); } inst_common::AtomicRmwOp::Nand => { // andq %r10, %r11 let i3 = Inst::alu_rmi_r(OperandSize::Size64, AluRmiROpcode::And, r10_rmi, r11_w); - i3.emit(sink, info, state); + i3.emit(&[], sink, info, state); // notq %r11 let i4 = Inst::not(OperandSize::Size64, r11_w); - i4.emit(sink, info, state); + i4.emit(&[], sink, info, state); } inst_common::AtomicRmwOp::Umin | inst_common::AtomicRmwOp::Umax @@ -2694,7 +2668,7 @@ pub(crate) fn emit( | inst_common::AtomicRmwOp::Smax => { // cmp %r11, %r10 let i3 = Inst::cmp_rmi_r(OperandSize::from_ty(*ty), RegMemImm::reg(r11), r10); - i3.emit(sink, info, state); + i3.emit(&[], sink, info, state); // cmovcc %r10, %r11 let cc = match op { @@ -2705,7 +2679,7 @@ pub(crate) fn emit( _ => unreachable!(), }; let i4 = Inst::cmove(OperandSize::Size64, cc, RegMem::reg(r10), r11_w); - i4.emit(sink, info, state); + i4.emit(&[], sink, info, state); } _ => { // opq %r10, %r11 @@ -2723,7 +2697,7 @@ pub(crate) fn emit( | inst_common::AtomicRmwOp::Smax => unreachable!(), }; let i3 = Inst::alu_rmi_r(OperandSize::Size64, alu_op, r10_rmi, r11_w); - i3.emit(sink, info, state); + i3.emit(&[], sink, info, state); } } @@ -2736,7 +2710,7 @@ pub(crate) fn emit( mem: amode.into(), dst_old: Writable::from_reg(regs::rax()), }; - i4.emit(sink, info, state); + i4.emit(&[], sink, info, state); // jnz again one_way_jmp(sink, CC::NZ, again_label); @@ -2902,13 +2876,13 @@ pub(crate) fn emit( sink.put1(0x17); } - Inst::ValueLabelMarker { .. } => { - // Nothing; this is only used to compute debug info. - } - Inst::Unwind { ref inst } => { sink.add_unwind(inst.clone()); } + + Inst::DummyUse { .. } => { + // Nothing. + } } state.clear_post_insn(); diff --git a/cranelift/codegen/src/isa/x64/inst/emit_tests.rs b/cranelift/codegen/src/isa/x64/inst/emit_tests.rs index 154992f0fb..95f0fd2049 100644 --- a/cranelift/codegen/src/isa/x64/inst/emit_tests.rs +++ b/cranelift/codegen/src/isa/x64/inst/emit_tests.rs @@ -19,7 +19,7 @@ use alloc::vec::Vec; impl Inst { fn neg(size: OperandSize, src: Writable) -> Inst { - debug_assert_eq!(src.to_reg().get_class(), RegClass::I64); + debug_assert_eq!(src.to_reg().class(), RegClass::Int); Inst::Neg { size, src: Gpr::new(src.to_reg()).unwrap(), @@ -1085,7 +1085,7 @@ fn test_x64_emit() { w_rdx, ), "4C01FA", - "addq %r15, %rdx", + "addq %rdx, %r15, %rdx", )); insns.push(( Inst::alu_rmi_r( @@ -1095,7 +1095,7 @@ fn test_x64_emit() { w_r8, ), "4101C8", - "addl %ecx, %r8d", + "addl %r8d, %ecx, %r8d", )); insns.push(( Inst::alu_rmi_r( @@ -1105,7 +1105,7 @@ fn test_x64_emit() { w_rsi, ), "01CE", - "addl %ecx, %esi", + "addl %esi, %ecx, %esi", )); insns.push(( Inst::alu_rmi_r( @@ -1115,7 +1115,7 @@ fn test_x64_emit() { w_rdx, ), "48035763", - "addq 99(%rdi), %rdx", + "addq %rdx, 99(%rdi), %rdx", )); insns.push(( Inst::alu_rmi_r( @@ -1125,7 +1125,7 @@ fn test_x64_emit() { w_r8, ), "44034763", - "addl 99(%rdi), %r8d", + "addl %r8d, 99(%rdi), %r8d", )); insns.push(( Inst::alu_rmi_r( @@ -1135,7 +1135,7 @@ fn test_x64_emit() { w_rsi, ), "037763", - "addl 99(%rdi), %esi", + "addl %esi, 99(%rdi), %esi", )); insns.push(( Inst::alu_rmi_r( @@ -1145,7 +1145,7 @@ fn test_x64_emit() { w_rdx, ), "4883C281", - "addq $-127, %rdx", + "addq %rdx, $-127, %rdx", )); insns.push(( Inst::alu_rmi_r( @@ -1155,7 +1155,7 @@ fn test_x64_emit() { w_rdx, ), "4881C27FFFFFFF", - "addq $-129, %rdx", + "addq %rdx, $-129, %rdx", )); insns.push(( Inst::alu_rmi_r( @@ -1165,7 +1165,7 @@ fn test_x64_emit() { w_rdx, ), "4881C2EAF48F04", - "addq $76543210, %rdx", + "addq %rdx, $76543210, %rdx", )); insns.push(( Inst::alu_rmi_r( @@ -1175,7 +1175,7 @@ fn test_x64_emit() { w_r8, ), "4183C081", - "addl $-127, %r8d", + "addl %r8d, $-127, %r8d", )); insns.push(( Inst::alu_rmi_r( @@ -1185,7 +1185,7 @@ fn test_x64_emit() { w_r8, ), "4181C07FFFFFFF", - "addl $-129, %r8d", + "addl %r8d, $-129, %r8d", )); insns.push(( Inst::alu_rmi_r( @@ -1195,7 +1195,7 @@ fn test_x64_emit() { w_r8, ), "4181C0160B70FB", - "addl $-76543210, %r8d", + "addl %r8d, $-76543210, %r8d", )); insns.push(( Inst::alu_rmi_r( @@ -1205,7 +1205,7 @@ fn test_x64_emit() { w_rsi, ), "83C681", - "addl $-127, %esi", + "addl %esi, $-127, %esi", )); insns.push(( Inst::alu_rmi_r( @@ -1215,7 +1215,7 @@ fn test_x64_emit() { w_rsi, ), "81C67FFFFFFF", - "addl $-129, %esi", + "addl %esi, $-129, %esi", )); insns.push(( Inst::alu_rmi_r( @@ -1225,7 +1225,7 @@ fn test_x64_emit() { w_rsi, ), "81C6EAF48F04", - "addl $76543210, %esi", + "addl %esi, $76543210, %esi", )); // This is pretty feeble insns.push(( @@ -1236,7 +1236,7 @@ fn test_x64_emit() { w_rdx, ), "4C29FA", - "subq %r15, %rdx", + "subq %rdx, %r15, %rdx", )); insns.push(( Inst::alu_rmi_r( @@ -1246,7 +1246,7 @@ fn test_x64_emit() { w_rdx, ), "4C21FA", - "andq %r15, %rdx", + "andq %rdx, %r15, %rdx", )); insns.push(( Inst::alu_rmi_r( @@ -1256,7 +1256,7 @@ fn test_x64_emit() { w_rdx, ), "4C09FA", - "orq %r15, %rdx", + "orq %rdx, %r15, %rdx", )); insns.push(( Inst::alu_rmi_r( @@ -1266,7 +1266,7 @@ fn test_x64_emit() { w_rdx, ), "4420FA", - "andb %r15b, %dl", + "andb %dl, %r15b, %dl", )); insns.push(( Inst::alu_rmi_r( @@ -1276,7 +1276,7 @@ fn test_x64_emit() { w_rsi, ), "4020C6", - "andb %al, %sil", + "andb %sil, %al, %sil", )); insns.push(( Inst::alu_rmi_r( @@ -1286,7 +1286,7 @@ fn test_x64_emit() { w_rbx, ), "20C3", - "andb %al, %bl", + "andb %bl, %al, %bl", )); insns.push(( Inst::alu_rmi_r( @@ -1296,7 +1296,7 @@ fn test_x64_emit() { w_rbx, ), "2218", - "andb 0(%rax), %bl", + "andb %bl, 0(%rax), %bl", )); insns.push(( Inst::alu_rmi_r( @@ -1306,7 +1306,7 @@ fn test_x64_emit() { w_rdx, ), "4408FA", - "orb %r15b, %dl", + "orb %dl, %r15b, %dl", )); insns.push(( Inst::alu_rmi_r( @@ -1316,7 +1316,7 @@ fn test_x64_emit() { w_rsi, ), "4008C6", - "orb %al, %sil", + "orb %sil, %al, %sil", )); insns.push(( Inst::alu_rmi_r( @@ -1326,7 +1326,7 @@ fn test_x64_emit() { w_rbx, ), "08C3", - "orb %al, %bl", + "orb %bl, %al, %bl", )); insns.push(( Inst::alu_rmi_r( @@ -1336,7 +1336,7 @@ fn test_x64_emit() { w_rbx, ), "0A18", - "orb 0(%rax), %bl", + "orb %bl, 0(%rax), %bl", )); insns.push(( Inst::alu_rmi_r( @@ -1346,7 +1346,7 @@ fn test_x64_emit() { w_rdx, ), "4C31FA", - "xorq %r15, %rdx", + "xorq %rdx, %r15, %rdx", )); // Test all mul cases, though insns.push(( @@ -1357,7 +1357,7 @@ fn test_x64_emit() { w_rdx, ), "490FAFD7", - "imulq %r15, %rdx", + "imulq %rdx, %r15, %rdx", )); insns.push(( Inst::alu_rmi_r( @@ -1367,7 +1367,7 @@ fn test_x64_emit() { w_r8, ), "440FAFC1", - "imull %ecx, %r8d", + "imull %r8d, %ecx, %r8d", )); insns.push(( Inst::alu_rmi_r( @@ -1377,7 +1377,7 @@ fn test_x64_emit() { w_rsi, ), "0FAFF1", - "imull %ecx, %esi", + "imull %esi, %ecx, %esi", )); insns.push(( Inst::alu_rmi_r( @@ -1387,7 +1387,7 @@ fn test_x64_emit() { w_rdx, ), "480FAF5763", - "imulq 99(%rdi), %rdx", + "imulq %rdx, 99(%rdi), %rdx", )); insns.push(( Inst::alu_rmi_r( @@ -1397,7 +1397,7 @@ fn test_x64_emit() { w_r8, ), "440FAF4763", - "imull 99(%rdi), %r8d", + "imull %r8d, 99(%rdi), %r8d", )); insns.push(( Inst::alu_rmi_r( @@ -1407,7 +1407,7 @@ fn test_x64_emit() { w_rsi, ), "0FAF7763", - "imull 99(%rdi), %esi", + "imull %esi, 99(%rdi), %esi", )); insns.push(( Inst::alu_rmi_r( @@ -1417,7 +1417,7 @@ fn test_x64_emit() { w_rdx, ), "486BD281", - "imulq $-127, %rdx", + "imulq %rdx, $-127, %rdx", )); insns.push(( Inst::alu_rmi_r( @@ -1427,7 +1427,7 @@ fn test_x64_emit() { w_rdx, ), "4869D27FFFFFFF", - "imulq $-129, %rdx", + "imulq %rdx, $-129, %rdx", )); insns.push(( Inst::alu_rmi_r( @@ -1437,7 +1437,7 @@ fn test_x64_emit() { w_rdx, ), "4869D2EAF48F04", - "imulq $76543210, %rdx", + "imulq %rdx, $76543210, %rdx", )); insns.push(( Inst::alu_rmi_r( @@ -1447,7 +1447,7 @@ fn test_x64_emit() { w_r8, ), "456BC081", - "imull $-127, %r8d", + "imull %r8d, $-127, %r8d", )); insns.push(( Inst::alu_rmi_r( @@ -1457,7 +1457,7 @@ fn test_x64_emit() { w_r8, ), "4569C07FFFFFFF", - "imull $-129, %r8d", + "imull %r8d, $-129, %r8d", )); insns.push(( Inst::alu_rmi_r( @@ -1467,7 +1467,7 @@ fn test_x64_emit() { w_r8, ), "4569C0160B70FB", - "imull $-76543210, %r8d", + "imull %r8d, $-76543210, %r8d", )); insns.push(( Inst::alu_rmi_r( @@ -1477,7 +1477,7 @@ fn test_x64_emit() { w_rsi, ), "6BF681", - "imull $-127, %esi", + "imull %esi, $-127, %esi", )); insns.push(( Inst::alu_rmi_r( @@ -1487,7 +1487,7 @@ fn test_x64_emit() { w_rsi, ), "69F67FFFFFFF", - "imull $-129, %esi", + "imull %esi, $-129, %esi", )); insns.push(( Inst::alu_rmi_r( @@ -1497,7 +1497,7 @@ fn test_x64_emit() { w_rsi, ), "69F6EAF48F04", - "imull $76543210, %esi", + "imull %esi, $76543210, %esi", )); // ======================================================== @@ -1529,32 +1529,32 @@ fn test_x64_emit() { insns.push(( Inst::not(OperandSize::Size32, Writable::from_reg(regs::rsi())), "F7D6", - "notl %esi", + "notl %esi, %esi", )); insns.push(( Inst::not(OperandSize::Size64, Writable::from_reg(regs::r15())), "49F7D7", - "notq %r15", + "notq %r15, %r15", )); insns.push(( Inst::not(OperandSize::Size32, Writable::from_reg(regs::r14())), "41F7D6", - "notl %r14d", + "notl %r14d, %r14d", )); insns.push(( Inst::not(OperandSize::Size16, Writable::from_reg(regs::rdi())), "66F7D7", - "notw %di", + "notw %di, %di", )); insns.push(( Inst::not(OperandSize::Size8, Writable::from_reg(regs::rdi())), "40F6D7", - "notb %dil", + "notb %dil, %dil", )); insns.push(( Inst::not(OperandSize::Size8, Writable::from_reg(regs::rax())), "F6D0", - "notb %al", + "notb %al, %al", )); // ======================================================== @@ -1562,32 +1562,32 @@ fn test_x64_emit() { insns.push(( Inst::neg(OperandSize::Size32, Writable::from_reg(regs::rsi())), "F7DE", - "negl %esi", + "negl %esi, %esi", )); insns.push(( Inst::neg(OperandSize::Size64, Writable::from_reg(regs::r15())), "49F7DF", - "negq %r15", + "negq %r15, %r15", )); insns.push(( Inst::neg(OperandSize::Size32, Writable::from_reg(regs::r14())), "41F7DE", - "negl %r14d", + "negl %r14d, %r14d", )); insns.push(( Inst::neg(OperandSize::Size16, Writable::from_reg(regs::rdi())), "66F7DF", - "negw %di", + "negw %di, %di", )); insns.push(( Inst::neg(OperandSize::Size8, Writable::from_reg(regs::rdi())), "40F6DF", - "negb %dil", + "negb %dil, %dil", )); insns.push(( Inst::neg(OperandSize::Size8, Writable::from_reg(regs::rax())), "F6D8", - "negb %al", + "negb %al, %al", )); // ======================================================== @@ -1599,7 +1599,7 @@ fn test_x64_emit() { RegMem::reg(regs::rsi()), ), "F7FE", - "idiv %esi", + "idiv %eax, %edx, %esi, %eax, %edx", )); insns.push(( Inst::div( @@ -1608,7 +1608,7 @@ fn test_x64_emit() { RegMem::reg(regs::r15()), ), "49F7FF", - "idiv %r15", + "idiv %rax, %rdx, %r15, %rax, %rdx", )); insns.push(( Inst::div( @@ -1617,7 +1617,7 @@ fn test_x64_emit() { RegMem::reg(regs::r14()), ), "41F7F6", - "div %r14d", + "div %eax, %edx, %r14d, %eax, %edx", )); insns.push(( Inst::div( @@ -1626,17 +1626,17 @@ fn test_x64_emit() { RegMem::reg(regs::rdi()), ), "48F7F7", - "div %rdi", + "div %rax, %rdx, %rdi, %rax, %rdx", )); insns.push(( Inst::div(OperandSize::Size8, false, RegMem::reg(regs::rax())), "F6F0", - "div %al", + "div %al, %dl, %al, %al, %dl", )); insns.push(( Inst::div(OperandSize::Size8, false, RegMem::reg(regs::rsi())), "40F6F6", - "div %sil", + "div %al, %dl, %sil, %al, %dl", )); // ======================================================== @@ -1648,7 +1648,7 @@ fn test_x64_emit() { RegMem::reg(regs::rsi()), ), "F7EE", - "imul %esi", + "imul %eax, %esi, %eax, %edx", )); insns.push(( Inst::mul_hi( @@ -1657,7 +1657,7 @@ fn test_x64_emit() { RegMem::reg(regs::r15()), ), "49F7EF", - "imul %r15", + "imul %rax, %r15, %rax, %rdx", )); insns.push(( Inst::mul_hi( @@ -1666,7 +1666,7 @@ fn test_x64_emit() { RegMem::reg(regs::r14()), ), "41F7E6", - "mul %r14d", + "mul %eax, %r14d, %eax, %edx", )); insns.push(( Inst::mul_hi( @@ -1675,18 +1675,34 @@ fn test_x64_emit() { RegMem::reg(regs::rdi()), ), "48F7E7", - "mul %rdi", + "mul %rax, %rdi, %rax, %rdx", )); // ======================================================== // cbw - insns.push((Inst::sign_extend_data(OperandSize::Size8), "6698", "cbw")); + insns.push(( + Inst::sign_extend_data(OperandSize::Size8), + "6698", + "cbw %al, %dl", + )); // ======================================================== // cdq family: SignExtendRaxRdx - insns.push((Inst::sign_extend_data(OperandSize::Size16), "6699", "cwd")); - insns.push((Inst::sign_extend_data(OperandSize::Size32), "99", "cdq")); - insns.push((Inst::sign_extend_data(OperandSize::Size64), "4899", "cqo")); + insns.push(( + Inst::sign_extend_data(OperandSize::Size16), + "6699", + "cwd %ax, %dx", + )); + insns.push(( + Inst::sign_extend_data(OperandSize::Size32), + "99", + "cdq %eax, %edx", + )); + insns.push(( + Inst::sign_extend_data(OperandSize::Size64), + "4899", + "cqo %rax, %rdx", + )); // ======================================================== // Imm_R @@ -2096,7 +2112,10 @@ fn test_x64_emit() { "lea 179(%r10,%r9,1), %r8", )); insns.push(( - Inst::lea(Amode::rip_relative(MachLabel::from_block(0)), w_rdi), + Inst::lea( + Amode::rip_relative(MachLabel::from_block(BlockIndex::new(0))), + w_rdi, + ), "488D3D00000000", "lea label0(%rip), %rdi", )); @@ -2670,47 +2689,47 @@ fn test_x64_emit() { insns.push(( Inst::shift_r(OperandSize::Size32, ShiftKind::ShiftLeft, None, w_rdi), "D3E7", - "shll %cl, %edi", + "shll %cl, %edi, %edi", )); insns.push(( Inst::shift_r(OperandSize::Size32, ShiftKind::ShiftLeft, None, w_r12), "41D3E4", - "shll %cl, %r12d", + "shll %cl, %r12d, %r12d", )); insns.push(( Inst::shift_r(OperandSize::Size32, ShiftKind::ShiftLeft, Some(2), w_r8), "41C1E002", - "shll $2, %r8d", + "shll $2, %r8d, %r8d", )); insns.push(( Inst::shift_r(OperandSize::Size32, ShiftKind::ShiftLeft, Some(31), w_r13), "41C1E51F", - "shll $31, %r13d", + "shll $31, %r13d, %r13d", )); insns.push(( Inst::shift_r(OperandSize::Size64, ShiftKind::ShiftLeft, None, w_r13), "49D3E5", - "shlq %cl, %r13", + "shlq %cl, %r13, %r13", )); insns.push(( Inst::shift_r(OperandSize::Size64, ShiftKind::ShiftLeft, None, w_rdi), "48D3E7", - "shlq %cl, %rdi", + "shlq %cl, %rdi, %rdi", )); insns.push(( Inst::shift_r(OperandSize::Size64, ShiftKind::ShiftLeft, Some(2), w_r8), "49C1E002", - "shlq $2, %r8", + "shlq $2, %r8, %r8", )); insns.push(( Inst::shift_r(OperandSize::Size64, ShiftKind::ShiftLeft, Some(3), w_rbx), "48C1E303", - "shlq $3, %rbx", + "shlq $3, %rbx, %rbx", )); insns.push(( Inst::shift_r(OperandSize::Size64, ShiftKind::ShiftLeft, Some(63), w_r13), "49C1E53F", - "shlq $63, %r13", + "shlq $63, %r13, %r13", )); insns.push(( Inst::shift_r( @@ -2720,7 +2739,7 @@ fn test_x64_emit() { w_rdi, ), "D3EF", - "shrl %cl, %edi", + "shrl %cl, %edi, %edi", )); insns.push(( Inst::shift_r( @@ -2730,7 +2749,7 @@ fn test_x64_emit() { w_r8, ), "41C1E802", - "shrl $2, %r8d", + "shrl $2, %r8d, %r8d", )); insns.push(( Inst::shift_r( @@ -2740,7 +2759,7 @@ fn test_x64_emit() { w_r13, ), "41C1ED1F", - "shrl $31, %r13d", + "shrl $31, %r13d, %r13d", )); insns.push(( Inst::shift_r( @@ -2750,7 +2769,7 @@ fn test_x64_emit() { w_rdi, ), "48D3EF", - "shrq %cl, %rdi", + "shrq %cl, %rdi, %rdi", )); insns.push(( Inst::shift_r( @@ -2760,7 +2779,7 @@ fn test_x64_emit() { w_r8, ), "49C1E802", - "shrq $2, %r8", + "shrq $2, %r8, %r8", )); insns.push(( Inst::shift_r( @@ -2770,7 +2789,7 @@ fn test_x64_emit() { w_r13, ), "49C1ED3F", - "shrq $63, %r13", + "shrq $63, %r13, %r13", )); insns.push(( Inst::shift_r( @@ -2780,7 +2799,7 @@ fn test_x64_emit() { w_rdi, ), "D3FF", - "sarl %cl, %edi", + "sarl %cl, %edi, %edi", )); insns.push(( Inst::shift_r( @@ -2790,7 +2809,7 @@ fn test_x64_emit() { w_r8, ), "41C1F802", - "sarl $2, %r8d", + "sarl $2, %r8d, %r8d", )); insns.push(( Inst::shift_r( @@ -2800,7 +2819,7 @@ fn test_x64_emit() { w_r13, ), "41C1FD1F", - "sarl $31, %r13d", + "sarl $31, %r13d, %r13d", )); insns.push(( Inst::shift_r( @@ -2810,7 +2829,7 @@ fn test_x64_emit() { w_rdi, ), "48D3FF", - "sarq %cl, %rdi", + "sarq %cl, %rdi, %rdi", )); insns.push(( Inst::shift_r( @@ -2820,7 +2839,7 @@ fn test_x64_emit() { w_r8, ), "49C1F802", - "sarq $2, %r8", + "sarq $2, %r8, %r8", )); insns.push(( Inst::shift_r( @@ -2830,52 +2849,52 @@ fn test_x64_emit() { w_r13, ), "49C1FD3F", - "sarq $63, %r13", + "sarq $63, %r13, %r13", )); insns.push(( Inst::shift_r(OperandSize::Size64, ShiftKind::RotateLeft, None, w_r8), "49D3C0", - "rolq %cl, %r8", + "rolq %cl, %r8, %r8", )); insns.push(( Inst::shift_r(OperandSize::Size32, ShiftKind::RotateLeft, Some(3), w_r9), "41C1C103", - "roll $3, %r9d", + "roll $3, %r9d, %r9d", )); insns.push(( Inst::shift_r(OperandSize::Size32, ShiftKind::RotateRight, None, w_rsi), "D3CE", - "rorl %cl, %esi", + "rorl %cl, %esi, %esi", )); insns.push(( Inst::shift_r(OperandSize::Size64, ShiftKind::RotateRight, Some(5), w_r15), "49C1CF05", - "rorq $5, %r15", + "rorq $5, %r15, %r15", )); insns.push(( Inst::shift_r(OperandSize::Size8, ShiftKind::RotateRight, None, w_rsi), "40D2CE", - "rorb %cl, %sil", + "rorb %cl, %sil, %sil", )); insns.push(( Inst::shift_r(OperandSize::Size8, ShiftKind::RotateRight, None, w_rax), "D2C8", - "rorb %cl, %al", + "rorb %cl, %al, %al", )); insns.push(( Inst::shift_r(OperandSize::Size8, ShiftKind::RotateRight, Some(5), w_r15), "41C0CF05", - "rorb $5, %r15b", + "rorb $5, %r15b, %r15b", )); insns.push(( Inst::shift_r(OperandSize::Size16, ShiftKind::RotateRight, None, w_rsi), "66D3CE", - "rorw %cl, %si", + "rorw %cl, %si, %si", )); insns.push(( Inst::shift_r(OperandSize::Size16, ShiftKind::RotateRight, Some(5), w_r15), "6641C1CF05", - "rorw $5, %r15w", + "rorw $5, %r15w, %r15w", )); // ======================================================== @@ -3319,7 +3338,7 @@ fn test_x64_emit() { insns.push(( Inst::cmove(OperandSize::Size16, CC::O, RegMem::reg(rdi), w_rsi), "660F40F7", - "cmovow %di, %si", + "cmovow %di, %si, %si", )); insns.push(( Inst::cmove( @@ -3334,12 +3353,12 @@ fn test_x64_emit() { w_r15, ), "66440F417CB725", - "cmovnow 37(%rdi,%rsi,4), %r15w", + "cmovnow 37(%rdi,%rsi,4), %r15w, %r15w", )); insns.push(( Inst::cmove(OperandSize::Size32, CC::LE, RegMem::reg(rdi), w_rsi), "0F4EF7", - "cmovlel %edi, %esi", + "cmovlel %edi, %esi, %esi", )); insns.push(( Inst::cmove( @@ -3349,12 +3368,12 @@ fn test_x64_emit() { w_rsi, ), "410F4F37", - "cmovnlel 0(%r15), %esi", + "cmovnlel 0(%r15), %esi, %esi", )); insns.push(( Inst::cmove(OperandSize::Size64, CC::Z, RegMem::reg(rdi), w_r14), "4C0F44F7", - "cmovzq %rdi, %r14", + "cmovzq %rdi, %r14, %r14", )); insns.push(( Inst::cmove( @@ -3364,7 +3383,7 @@ fn test_x64_emit() { w_r14, ), "4C0F45770D", - "cmovnzq 13(%rdi), %r14", + "cmovnzq 13(%rdi), %r14, %r14", )); // ======================================================== @@ -3513,7 +3532,7 @@ fn test_x64_emit() { // ======================================================== // Ret - insns.push((Inst::ret(), "C3", "ret")); + insns.push((Inst::ret(vec![]), "C3", "ret")); // ======================================================== // JmpKnown skipped for now @@ -3589,12 +3608,12 @@ fn test_x64_emit() { insns.push(( Inst::xmm_rm_r(SseOpcode::Addss, RegMem::reg(xmm1), w_xmm0), "F30F58C1", - "addss %xmm1, %xmm0", + "addss %xmm0, %xmm1, %xmm0", )); insns.push(( Inst::xmm_rm_r(SseOpcode::Addss, RegMem::reg(xmm11), w_xmm13), "F3450F58EB", - "addss %xmm11, %xmm13", + "addss %xmm13, %xmm11, %xmm13", )); insns.push(( Inst::xmm_rm_r( @@ -3608,23 +3627,23 @@ fn test_x64_emit() { w_xmm0, ), "F3410F5844927B", - "addss 123(%r10,%rdx,4), %xmm0", + "addss %xmm0, 123(%r10,%rdx,4), %xmm0", )); insns.push(( Inst::xmm_rm_r(SseOpcode::Addsd, RegMem::reg(xmm15), w_xmm4), "F2410F58E7", - "addsd %xmm15, %xmm4", + "addsd %xmm4, %xmm15, %xmm4", )); insns.push(( Inst::xmm_rm_r(SseOpcode::Subss, RegMem::reg(xmm0), w_xmm1), "F30F5CC8", - "subss %xmm0, %xmm1", + "subss %xmm1, %xmm0, %xmm1", )); insns.push(( Inst::xmm_rm_r(SseOpcode::Subss, RegMem::reg(xmm12), w_xmm1), "F3410F5CCC", - "subss %xmm12, %xmm1", + "subss %xmm1, %xmm12, %xmm1", )); insns.push(( Inst::xmm_rm_r( @@ -3638,75 +3657,75 @@ fn test_x64_emit() { w_xmm10, ), "F3450F5C94C241010000", - "subss 321(%r10,%rax,8), %xmm10", + "subss %xmm10, 321(%r10,%rax,8), %xmm10", )); insns.push(( Inst::xmm_rm_r(SseOpcode::Subsd, RegMem::reg(xmm5), w_xmm14), "F2440F5CF5", - "subsd %xmm5, %xmm14", + "subsd %xmm14, %xmm5, %xmm14", )); insns.push(( Inst::xmm_rm_r(SseOpcode::Mulss, RegMem::reg(xmm5), w_xmm4), "F30F59E5", - "mulss %xmm5, %xmm4", + "mulss %xmm4, %xmm5, %xmm4", )); insns.push(( Inst::xmm_rm_r(SseOpcode::Mulsd, RegMem::reg(xmm5), w_xmm4), "F20F59E5", - "mulsd %xmm5, %xmm4", + "mulsd %xmm4, %xmm5, %xmm4", )); insns.push(( Inst::xmm_rm_r(SseOpcode::Divss, RegMem::reg(xmm8), w_xmm7), "F3410F5EF8", - "divss %xmm8, %xmm7", + "divss %xmm7, %xmm8, %xmm7", )); insns.push(( Inst::xmm_rm_r(SseOpcode::Divsd, RegMem::reg(xmm5), w_xmm4), "F20F5EE5", - "divsd %xmm5, %xmm4", + "divsd %xmm4, %xmm5, %xmm4", )); insns.push(( Inst::xmm_rm_r(SseOpcode::Andps, RegMem::reg(xmm3), w_xmm12), "440F54E3", - "andps %xmm3, %xmm12", + "andps %xmm12, %xmm3, %xmm12", )); insns.push(( Inst::xmm_rm_r(SseOpcode::Andnps, RegMem::reg(xmm4), w_xmm11), "440F55DC", - "andnps %xmm4, %xmm11", + "andnps %xmm11, %xmm4, %xmm11", )); insns.push(( Inst::xmm_rm_r(SseOpcode::Orps, RegMem::reg(xmm1), w_xmm15), "440F56F9", - "orps %xmm1, %xmm15", + "orps %xmm15, %xmm1, %xmm15", )); insns.push(( Inst::xmm_rm_r(SseOpcode::Orps, RegMem::reg(xmm5), w_xmm4), "0F56E5", - "orps %xmm5, %xmm4", + "orps %xmm4, %xmm5, %xmm4", )); insns.push(( Inst::xmm_rm_r(SseOpcode::Blendvpd, RegMem::reg(xmm15), w_xmm4), "66410F3815E7", - "blendvpd %xmm15, %xmm4", + "blendvpd %xmm4, %xmm15, %xmm4", )); insns.push(( Inst::xmm_rm_r(SseOpcode::Blendvps, RegMem::reg(xmm2), w_xmm3), "660F3814DA", - "blendvps %xmm2, %xmm3", + "blendvps %xmm3, %xmm2, %xmm3", )); insns.push(( Inst::xmm_rm_r(SseOpcode::Pblendvb, RegMem::reg(xmm12), w_xmm13), "66450F3810EC", - "pblendvb %xmm12, %xmm13", + "pblendvb %xmm13, %xmm12, %xmm13", )); // ======================================================== @@ -3715,139 +3734,139 @@ fn test_x64_emit() { insns.push(( Inst::xmm_rm_r(SseOpcode::Paddb, RegMem::reg(xmm9), w_xmm5), "66410FFCE9", - "paddb %xmm9, %xmm5", + "paddb %xmm5, %xmm9, %xmm5", )); insns.push(( Inst::xmm_rm_r(SseOpcode::Paddw, RegMem::reg(xmm7), w_xmm6), "660FFDF7", - "paddw %xmm7, %xmm6", + "paddw %xmm6, %xmm7, %xmm6", )); insns.push(( Inst::xmm_rm_r(SseOpcode::Paddd, RegMem::reg(xmm12), w_xmm13), "66450FFEEC", - "paddd %xmm12, %xmm13", + "paddd %xmm13, %xmm12, %xmm13", )); insns.push(( Inst::xmm_rm_r(SseOpcode::Paddq, RegMem::reg(xmm1), w_xmm8), "66440FD4C1", - "paddq %xmm1, %xmm8", + "paddq %xmm8, %xmm1, %xmm8", )); insns.push(( Inst::xmm_rm_r(SseOpcode::Paddsb, RegMem::reg(xmm9), w_xmm5), "66410FECE9", - "paddsb %xmm9, %xmm5", + "paddsb %xmm5, %xmm9, %xmm5", )); insns.push(( Inst::xmm_rm_r(SseOpcode::Paddsw, RegMem::reg(xmm7), w_xmm6), "660FEDF7", - "paddsw %xmm7, %xmm6", + "paddsw %xmm6, %xmm7, %xmm6", )); insns.push(( Inst::xmm_rm_r(SseOpcode::Paddusb, RegMem::reg(xmm12), w_xmm13), "66450FDCEC", - "paddusb %xmm12, %xmm13", + "paddusb %xmm13, %xmm12, %xmm13", )); insns.push(( Inst::xmm_rm_r(SseOpcode::Paddusw, RegMem::reg(xmm1), w_xmm8), "66440FDDC1", - "paddusw %xmm1, %xmm8", + "paddusw %xmm8, %xmm1, %xmm8", )); insns.push(( Inst::xmm_rm_r(SseOpcode::Psubsb, RegMem::reg(xmm9), w_xmm5), "66410FE8E9", - "psubsb %xmm9, %xmm5", + "psubsb %xmm5, %xmm9, %xmm5", )); insns.push(( Inst::xmm_rm_r(SseOpcode::Psubsw, RegMem::reg(xmm7), w_xmm6), "660FE9F7", - "psubsw %xmm7, %xmm6", + "psubsw %xmm6, %xmm7, %xmm6", )); insns.push(( Inst::xmm_rm_r(SseOpcode::Psubusb, RegMem::reg(xmm12), w_xmm13), "66450FD8EC", - "psubusb %xmm12, %xmm13", + "psubusb %xmm13, %xmm12, %xmm13", )); insns.push(( Inst::xmm_rm_r(SseOpcode::Psubusw, RegMem::reg(xmm1), w_xmm8), "66440FD9C1", - "psubusw %xmm1, %xmm8", + "psubusw %xmm8, %xmm1, %xmm8", )); insns.push(( Inst::xmm_rm_r(SseOpcode::Pavgb, RegMem::reg(xmm12), w_xmm13), "66450FE0EC", - "pavgb %xmm12, %xmm13", + "pavgb %xmm13, %xmm12, %xmm13", )); insns.push(( Inst::xmm_rm_r(SseOpcode::Pavgw, RegMem::reg(xmm1), w_xmm8), "66440FE3C1", - "pavgw %xmm1, %xmm8", + "pavgw %xmm8, %xmm1, %xmm8", )); insns.push(( Inst::xmm_rm_r(SseOpcode::Psubb, RegMem::reg(xmm5), w_xmm9), "66440FF8CD", - "psubb %xmm5, %xmm9", + "psubb %xmm9, %xmm5, %xmm9", )); insns.push(( Inst::xmm_rm_r(SseOpcode::Psubw, RegMem::reg(xmm6), w_xmm7), "660FF9FE", - "psubw %xmm6, %xmm7", + "psubw %xmm7, %xmm6, %xmm7", )); insns.push(( Inst::xmm_rm_r(SseOpcode::Psubd, RegMem::reg(xmm13), w_xmm12), "66450FFAE5", - "psubd %xmm13, %xmm12", + "psubd %xmm12, %xmm13, %xmm12", )); insns.push(( Inst::xmm_rm_r(SseOpcode::Psubq, RegMem::reg(xmm8), w_xmm1), "66410FFBC8", - "psubq %xmm8, %xmm1", + "psubq %xmm1, %xmm8, %xmm1", )); insns.push(( Inst::xmm_rm_r(SseOpcode::Pmuldq, RegMem::reg(xmm4), w_xmm15), "66440F3828FC", - "pmuldq %xmm4, %xmm15", + "pmuldq %xmm15, %xmm4, %xmm15", )); insns.push(( Inst::xmm_rm_r(SseOpcode::Pmulhw, RegMem::reg(xmm9), w_xmm1), "66410FE5C9", - "pmulhw %xmm9, %xmm1", + "pmulhw %xmm1, %xmm9, %xmm1", )); insns.push(( Inst::xmm_rm_r(SseOpcode::Pmulhuw, RegMem::reg(xmm7), w_xmm9), "66440FE4CF", - "pmulhuw %xmm7, %xmm9", + "pmulhuw %xmm9, %xmm7, %xmm9", )); insns.push(( Inst::xmm_rm_r(SseOpcode::Pmulld, RegMem::reg(xmm15), w_xmm6), "66410F3840F7", - "pmulld %xmm15, %xmm6", + "pmulld %xmm6, %xmm15, %xmm6", )); insns.push(( Inst::xmm_rm_r(SseOpcode::Pmullw, RegMem::reg(xmm14), w_xmm1), "66410FD5CE", - "pmullw %xmm14, %xmm1", + "pmullw %xmm1, %xmm14, %xmm1", )); insns.push(( @@ -3871,151 +3890,151 @@ fn test_x64_emit() { insns.push(( Inst::xmm_rm_r(SseOpcode::Pmuludq, RegMem::reg(xmm8), w_xmm9), "66450FF4C8", - "pmuludq %xmm8, %xmm9", + "pmuludq %xmm9, %xmm8, %xmm9", )); insns.push(( Inst::xmm_rm_r(SseOpcode::Pmaddwd, RegMem::reg(xmm8), w_xmm1), "66410FF5C8", - "pmaddwd %xmm8, %xmm1", + "pmaddwd %xmm1, %xmm8, %xmm1", )); insns.push(( Inst::xmm_rm_r(SseOpcode::Pmaxsb, RegMem::reg(xmm15), w_xmm6), "66410F383CF7", - "pmaxsb %xmm15, %xmm6", + "pmaxsb %xmm6, %xmm15, %xmm6", )); insns.push(( Inst::xmm_rm_r(SseOpcode::Pmaxsw, RegMem::reg(xmm15), w_xmm6), "66410FEEF7", - "pmaxsw %xmm15, %xmm6", + "pmaxsw %xmm6, %xmm15, %xmm6", )); insns.push(( Inst::xmm_rm_r(SseOpcode::Pmaxsd, RegMem::reg(xmm15), w_xmm6), "66410F383DF7", - "pmaxsd %xmm15, %xmm6", + "pmaxsd %xmm6, %xmm15, %xmm6", )); insns.push(( Inst::xmm_rm_r(SseOpcode::Pmaxub, RegMem::reg(xmm14), w_xmm1), "66410FDECE", - "pmaxub %xmm14, %xmm1", + "pmaxub %xmm1, %xmm14, %xmm1", )); insns.push(( Inst::xmm_rm_r(SseOpcode::Pmaxuw, RegMem::reg(xmm14), w_xmm1), "66410F383ECE", - "pmaxuw %xmm14, %xmm1", + "pmaxuw %xmm1, %xmm14, %xmm1", )); insns.push(( Inst::xmm_rm_r(SseOpcode::Pmaxud, RegMem::reg(xmm14), w_xmm1), "66410F383FCE", - "pmaxud %xmm14, %xmm1", + "pmaxud %xmm1, %xmm14, %xmm1", )); insns.push(( Inst::xmm_rm_r(SseOpcode::Pminsb, RegMem::reg(xmm8), w_xmm9), "66450F3838C8", - "pminsb %xmm8, %xmm9", + "pminsb %xmm9, %xmm8, %xmm9", )); insns.push(( Inst::xmm_rm_r(SseOpcode::Pminsw, RegMem::reg(xmm8), w_xmm9), "66450FEAC8", - "pminsw %xmm8, %xmm9", + "pminsw %xmm9, %xmm8, %xmm9", )); insns.push(( Inst::xmm_rm_r(SseOpcode::Pminsd, RegMem::reg(xmm8), w_xmm9), "66450F3839C8", - "pminsd %xmm8, %xmm9", + "pminsd %xmm9, %xmm8, %xmm9", )); insns.push(( Inst::xmm_rm_r(SseOpcode::Pminub, RegMem::reg(xmm3), w_xmm2), "660FDAD3", - "pminub %xmm3, %xmm2", + "pminub %xmm2, %xmm3, %xmm2", )); insns.push(( Inst::xmm_rm_r(SseOpcode::Pminuw, RegMem::reg(xmm3), w_xmm2), "660F383AD3", - "pminuw %xmm3, %xmm2", + "pminuw %xmm2, %xmm3, %xmm2", )); insns.push(( Inst::xmm_rm_r(SseOpcode::Pminud, RegMem::reg(xmm3), w_xmm2), "660F383BD3", - "pminud %xmm3, %xmm2", + "pminud %xmm2, %xmm3, %xmm2", )); insns.push(( Inst::xmm_rm_r(SseOpcode::Pxor, RegMem::reg(xmm11), w_xmm2), "66410FEFD3", - "pxor %xmm11, %xmm2", + "pxor %xmm2, %xmm11, %xmm2", )); insns.push(( Inst::xmm_rm_r(SseOpcode::Pshufb, RegMem::reg(xmm11), w_xmm2), "66410F3800D3", - "pshufb %xmm11, %xmm2", + "pshufb %xmm2, %xmm11, %xmm2", )); insns.push(( Inst::xmm_rm_r(SseOpcode::Packssdw, RegMem::reg(xmm11), w_xmm12), "66450F6BE3", - "packssdw %xmm11, %xmm12", + "packssdw %xmm12, %xmm11, %xmm12", )); insns.push(( Inst::xmm_rm_r(SseOpcode::Packsswb, RegMem::reg(xmm11), w_xmm2), "66410F63D3", - "packsswb %xmm11, %xmm2", + "packsswb %xmm2, %xmm11, %xmm2", )); insns.push(( Inst::xmm_rm_r(SseOpcode::Packusdw, RegMem::reg(xmm13), w_xmm6), "66410F382BF5", - "packusdw %xmm13, %xmm6", + "packusdw %xmm6, %xmm13, %xmm6", )); insns.push(( Inst::xmm_rm_r(SseOpcode::Packuswb, RegMem::reg(xmm9), w_xmm4), "66410F67E1", - "packuswb %xmm9, %xmm4", + "packuswb %xmm4, %xmm9, %xmm4", )); insns.push(( Inst::xmm_rm_r(SseOpcode::Punpckhbw, RegMem::reg(xmm3), w_xmm2), "660F68D3", - "punpckhbw %xmm3, %xmm2", + "punpckhbw %xmm2, %xmm3, %xmm2", )); insns.push(( Inst::xmm_rm_r(SseOpcode::Punpckhwd, RegMem::reg(xmm13), w_xmm2), "66410F69D5", - "punpckhwd %xmm13, %xmm2", + "punpckhwd %xmm2, %xmm13, %xmm2", )); insns.push(( Inst::xmm_rm_r(SseOpcode::Punpcklbw, RegMem::reg(xmm1), w_xmm8), "66440F60C1", - "punpcklbw %xmm1, %xmm8", + "punpcklbw %xmm8, %xmm1, %xmm8", )); insns.push(( Inst::xmm_rm_r(SseOpcode::Punpcklwd, RegMem::reg(xmm11), w_xmm8), "66450F61C3", - "punpcklwd %xmm11, %xmm8", + "punpcklwd %xmm8, %xmm11, %xmm8", )); insns.push(( Inst::xmm_rm_r(SseOpcode::Unpcklps, RegMem::reg(xmm11), w_xmm2), "410F14D3", - "unpcklps %xmm11, %xmm2", + "unpcklps %xmm2, %xmm11, %xmm2", )); // ======================================================== @@ -4023,19 +4042,19 @@ fn test_x64_emit() { insns.push(( Inst::xmm_rm_r(SseOpcode::Cvtdq2ps, RegMem::reg(xmm1), w_xmm8), "440F5BC1", - "cvtdq2ps %xmm1, %xmm8", + "cvtdq2ps %xmm8, %xmm1, %xmm8", )); insns.push(( Inst::xmm_rm_r(SseOpcode::Cvttpd2dq, RegMem::reg(xmm15), w_xmm7), "66410FE6FF", - "cvttpd2dq %xmm15, %xmm7", + "cvttpd2dq %xmm7, %xmm15, %xmm7", )); insns.push(( Inst::xmm_rm_r(SseOpcode::Cvttps2dq, RegMem::reg(xmm9), w_xmm8), "F3450F5BC1", - "cvttps2dq %xmm9, %xmm8", + "cvttps2dq %xmm8, %xmm9, %xmm8", )); // XMM_Mov_R_M: float stores @@ -4347,17 +4366,17 @@ fn test_x64_emit() { insns.push(( Inst::xmm_rmi_reg(SseOpcode::Psraw, RegMemImm::reg(xmm10), w_xmm1), "66410FE1CA", - "psraw %xmm10, %xmm1", + "psraw %xmm1, %xmm10, %xmm1", )); insns.push(( Inst::xmm_rmi_reg(SseOpcode::Pslld, RegMemImm::imm(31), w_xmm1), "660F72F11F", - "pslld $31, %xmm1", + "pslld %xmm1, $31, %xmm1", )); insns.push(( Inst::xmm_rmi_reg(SseOpcode::Psrlq, RegMemImm::imm(1), w_xmm3), "660F73D301", - "psrlq $1, %xmm3", + "psrlq %xmm3, $1, %xmm3", )); // ======================================================== @@ -4371,7 +4390,7 @@ fn test_x64_emit() { OperandSize::Size32, ), "660FC2CD02", - "cmppd $2, %xmm5, %xmm1", + "cmppd $2, %xmm1, %xmm5, %xmm1", )); insns.push(( Inst::xmm_rm_r_imm( @@ -4382,7 +4401,7 @@ fn test_x64_emit() { OperandSize::Size32, ), "410FC2FF00", - "cmpps $0, %xmm15, %xmm7", + "cmpps $0, %xmm7, %xmm15, %xmm7", )); insns.push(( Inst::xmm_rm_r_imm( @@ -4393,7 +4412,7 @@ fn test_x64_emit() { OperandSize::Size32, ), "66440F3A0FC903", - "palignr $3, %xmm1, %xmm9", + "palignr $3, %xmm9, %xmm1, %xmm9", )); insns.push(( @@ -4405,7 +4424,7 @@ fn test_x64_emit() { OperandSize::Size32, ), "440FC6D188", - "shufps $136, %xmm1, %xmm10", + "shufps $136, %xmm10, %xmm1, %xmm10", )); insns.push(( @@ -4477,7 +4496,7 @@ fn test_x64_emit() { dst_old: w_rax, }, "F0410FB09C9241010000", - "lock cmpxchgb %bl, 321(%r10,%rdx,4)", + "lock cmpxchgb %bl, 321(%r10,%rdx,4), expected=%al, dst_old=%al", )); // Check redundant rex retention in 8-bit cases. insns.push(( @@ -4489,7 +4508,7 @@ fn test_x64_emit() { dst_old: w_rax, }, "F00FB094F1C7CFFFFF", - "lock cmpxchgb %dl, -12345(%rcx,%rsi,8)", + "lock cmpxchgb %dl, -12345(%rcx,%rsi,8), expected=%al, dst_old=%al", )); insns.push(( Inst::LockCmpxchg { @@ -4500,7 +4519,7 @@ fn test_x64_emit() { dst_old: w_rax, }, "F0400FB0B4F1C7CFFFFF", - "lock cmpxchgb %sil, -12345(%rcx,%rsi,8)", + "lock cmpxchgb %sil, -12345(%rcx,%rsi,8), expected=%al, dst_old=%al", )); insns.push(( Inst::LockCmpxchg { @@ -4511,7 +4530,7 @@ fn test_x64_emit() { dst_old: w_rax, }, "F0440FB094F1C7CFFFFF", - "lock cmpxchgb %r10b, -12345(%rcx,%rsi,8)", + "lock cmpxchgb %r10b, -12345(%rcx,%rsi,8), expected=%al, dst_old=%al", )); insns.push(( Inst::LockCmpxchg { @@ -4522,7 +4541,7 @@ fn test_x64_emit() { dst_old: w_rax, }, "F0440FB0BCF1C7CFFFFF", - "lock cmpxchgb %r15b, -12345(%rcx,%rsi,8)", + "lock cmpxchgb %r15b, -12345(%rcx,%rsi,8), expected=%al, dst_old=%al", )); // 16 bit cases insns.push(( @@ -4534,7 +4553,7 @@ fn test_x64_emit() { dst_old: w_rax, }, "66F00FB1B4F1C7CFFFFF", - "lock cmpxchgw %si, -12345(%rcx,%rsi,8)", + "lock cmpxchgw %si, -12345(%rcx,%rsi,8), expected=%ax, dst_old=%ax", )); insns.push(( Inst::LockCmpxchg { @@ -4545,7 +4564,7 @@ fn test_x64_emit() { dst_old: w_rax, }, "66F0440FB194F1C7CFFFFF", - "lock cmpxchgw %r10w, -12345(%rcx,%rsi,8)", + "lock cmpxchgw %r10w, -12345(%rcx,%rsi,8), expected=%ax, dst_old=%ax", )); // 32 bit cases insns.push(( @@ -4557,7 +4576,7 @@ fn test_x64_emit() { dst_old: w_rax, }, "F00FB1B4F1C7CFFFFF", - "lock cmpxchgl %esi, -12345(%rcx,%rsi,8)", + "lock cmpxchgl %esi, -12345(%rcx,%rsi,8), expected=%eax, dst_old=%eax", )); insns.push(( Inst::LockCmpxchg { @@ -4568,7 +4587,7 @@ fn test_x64_emit() { dst_old: w_rax, }, "F0440FB194F1C7CFFFFF", - "lock cmpxchgl %r10d, -12345(%rcx,%rsi,8)", + "lock cmpxchgl %r10d, -12345(%rcx,%rsi,8), expected=%eax, dst_old=%eax", )); // 64 bit cases insns.push(( @@ -4580,7 +4599,7 @@ fn test_x64_emit() { dst_old: w_rax, }, "F0480FB1B4F1C7CFFFFF", - "lock cmpxchgq %rsi, -12345(%rcx,%rsi,8)", + "lock cmpxchgq %rsi, -12345(%rcx,%rsi,8), expected=%rax, dst_old=%rax", )); insns.push(( Inst::LockCmpxchg { @@ -4591,7 +4610,7 @@ fn test_x64_emit() { dst_old: w_rax, }, "F04C0FB194F1C7CFFFFF", - "lock cmpxchgq %r10, -12345(%rcx,%rsi,8)", + "lock cmpxchgq %r10, -12345(%rcx,%rsi,8), expected=%rax, dst_old=%rax", )); // AtomicRmwSeq @@ -4726,15 +4745,14 @@ fn test_x64_emit() { isa_flag_builder.enable("has_avx512vl").unwrap(); let isa_flags = x64::settings::Flags::new(&flags, isa_flag_builder); - let rru = regs::create_reg_universe_systemv(&flags); let emit_info = EmitInfo::new(flags, isa_flags); for (insn, expected_encoding, expected_printing) in insns { // Check the printed text is as expected. - let actual_printing = insn.show_rru(Some(&rru)); + let actual_printing = insn.pretty_print_inst(&[], &mut Default::default()); assert_eq!(expected_printing, actual_printing); let mut buffer = MachBuffer::new(); - insn.emit(&mut buffer, &emit_info, &mut Default::default()); + insn.emit(&[], &mut buffer, &emit_info, &mut Default::default()); // Allow one label just after the instruction (so the offset is 0). let label = buffer.get_label(); diff --git a/cranelift/codegen/src/isa/x64/inst/mod.rs b/cranelift/codegen/src/isa/x64/inst/mod.rs index 9bf0fe2f04..1dd8d18ad6 100644 --- a/cranelift/codegen/src/isa/x64/inst/mod.rs +++ b/cranelift/codegen/src/isa/x64/inst/mod.rs @@ -1,17 +1,15 @@ //! This module defines x86_64-specific machine instruction types. use crate::binemit::{Addend, CodeOffset, Reloc, StackMap}; -use crate::ir::{types, ExternalName, Opcode, SourceLoc, TrapCode, Type, ValueLabel}; +use crate::ir::{types, ExternalName, Opcode, SourceLoc, TrapCode, Type}; use crate::isa::x64::abi::X64ABIMachineSpec; +use crate::isa::x64::inst::regs::pretty_print_reg; use crate::isa::x64::settings as x64_settings; use crate::isa::CallConv; use crate::machinst::*; use crate::{settings, CodegenError, CodegenResult}; use alloc::vec::Vec; -use regalloc::{ - PrettyPrint, PrettyPrintSized, RealRegUniverse, Reg, RegClass, RegUsageCollector, SpillSlot, - VirtualReg, Writable, -}; +use regalloc2::{Allocation, VReg}; use smallvec::{smallvec, SmallVec}; use std::fmt; use std::string::{String, ToString}; @@ -24,7 +22,6 @@ pub mod regs; pub mod unwind; use args::*; -use regs::show_ireg_sized; //============================================================================= // Instructions (top level): definition @@ -80,7 +77,7 @@ impl Inst { | Inst::Nop { .. } | Inst::Pop64 { .. } | Inst::Push64 { .. } - | Inst::Ret + | Inst::Ret { .. } | Inst::Setcc { .. } | Inst::ShiftR { .. } | Inst::SignExtendData { .. } @@ -94,8 +91,8 @@ impl Inst { | Inst::XmmUninitializedValue { .. } | Inst::ElfTlsGetAddr { .. } | Inst::MachOTlsGetAddr { .. } - | Inst::ValueLabelMarker { .. } - | Inst::Unwind { .. } => smallvec![], + | Inst::Unwind { .. } + | Inst::DummyUse { .. } => smallvec![], Inst::UnaryRmR { op, .. } => op.available_from(), @@ -128,8 +125,8 @@ impl Inst { dst: Writable, ) -> Self { debug_assert!(size.is_one_of(&[OperandSize::Size32, OperandSize::Size64])); - src.assert_regclass_is(RegClass::I64); - debug_assert!(dst.to_reg().get_class() == RegClass::I64); + src.assert_regclass_is(RegClass::Int); + debug_assert!(dst.to_reg().class() == RegClass::Int); Self::AluRmiR { size, op, @@ -146,8 +143,8 @@ impl Inst { src: RegMem, dst: Writable, ) -> Self { - src.assert_regclass_is(RegClass::I64); - debug_assert!(dst.to_reg().get_class() == RegClass::I64); + src.assert_regclass_is(RegClass::Int); + debug_assert!(dst.to_reg().class() == RegClass::Int); debug_assert!(size.is_one_of(&[ OperandSize::Size16, OperandSize::Size32, @@ -162,7 +159,7 @@ impl Inst { } pub(crate) fn not(size: OperandSize, src: Writable) -> Inst { - debug_assert_eq!(src.to_reg().get_class(), RegClass::I64); + debug_assert_eq!(src.to_reg().class(), RegClass::Int); Inst::Not { size, src: Gpr::new(src.to_reg()).unwrap(), @@ -171,12 +168,13 @@ impl Inst { } pub(crate) fn div(size: OperandSize, signed: bool, divisor: RegMem) -> Inst { - divisor.assert_regclass_is(RegClass::I64); + divisor.assert_regclass_is(RegClass::Int); Inst::Div { size, signed, divisor: GprMem::new(divisor).unwrap(), - dividend: Gpr::new(regs::rax()).unwrap(), + dividend_lo: Gpr::new(regs::rax()).unwrap(), + dividend_hi: Gpr::new(regs::rdx()).unwrap(), dst_quotient: WritableGpr::from_reg(Gpr::new(regs::rax()).unwrap()), dst_remainder: Writable::from_reg(Gpr::new(regs::rdx()).unwrap()), } @@ -188,7 +186,7 @@ impl Inst { OperandSize::Size32, OperandSize::Size64 ])); - rhs.assert_regclass_is(RegClass::I64); + rhs.assert_regclass_is(RegClass::Int); Inst::MulHi { size, signed, @@ -205,15 +203,16 @@ impl Inst { divisor: Writable, tmp: Option>, ) -> Inst { - debug_assert!(divisor.to_reg().get_class() == RegClass::I64); + debug_assert!(divisor.to_reg().class() == RegClass::Int); debug_assert!(tmp - .map(|tmp| tmp.to_reg().get_class() == RegClass::I64) + .map(|tmp| tmp.to_reg().class() == RegClass::Int) .unwrap_or(true)); Inst::CheckedDivOrRemSeq { kind, size, divisor: WritableGpr::from_writable_reg(divisor).unwrap(), - dividend: Gpr::new(regs::rax()).unwrap(), + dividend_lo: Gpr::new(regs::rax()).unwrap(), + dividend_hi: Gpr::new(regs::rdx()).unwrap(), dst_quotient: Writable::from_reg(Gpr::new(regs::rax()).unwrap()), dst_remainder: Writable::from_reg(Gpr::new(regs::rdx()).unwrap()), tmp: tmp.map(|tmp| WritableGpr::from_writable_reg(tmp).unwrap()), @@ -230,7 +229,7 @@ impl Inst { pub(crate) fn imm(dst_size: OperandSize, simm64: u64, dst: Writable) -> Inst { debug_assert!(dst_size.is_one_of(&[OperandSize::Size32, OperandSize::Size64])); - debug_assert!(dst.to_reg().get_class() == RegClass::I64); + debug_assert!(dst.to_reg().class() == RegClass::Int); // Try to generate a 32-bit immediate when the upper high bits are zeroed (which matches // the semantics of movl). let dst_size = match dst_size { @@ -246,8 +245,8 @@ impl Inst { pub(crate) fn mov_r_r(size: OperandSize, src: Reg, dst: Writable) -> Inst { debug_assert!(size.is_one_of(&[OperandSize::Size32, OperandSize::Size64])); - debug_assert!(src.get_class() == RegClass::I64); - debug_assert!(dst.to_reg().get_class() == RegClass::I64); + debug_assert!(src.class() == RegClass::Int); + debug_assert!(dst.to_reg().class() == RegClass::Int); let src = Gpr::new(src).unwrap(); let dst = WritableGpr::from_writable_reg(dst).unwrap(); Inst::MovRR { size, src, dst } @@ -255,8 +254,8 @@ impl Inst { // TODO Can be replaced by `Inst::move` (high-level) and `Inst::unary_rm_r` (low-level) pub(crate) fn xmm_mov(op: SseOpcode, src: RegMem, dst: Writable) -> Inst { - src.assert_regclass_is(RegClass::V128); - debug_assert!(dst.to_reg().get_class() == RegClass::V128); + src.assert_regclass_is(RegClass::Float); + debug_assert!(dst.to_reg().class() == RegClass::Float); Inst::XmmUnaryRmR { op, src: XmmMem::new(src).unwrap(), @@ -265,15 +264,15 @@ impl Inst { } pub(crate) fn xmm_load_const(src: VCodeConstant, dst: Writable, ty: Type) -> Inst { - debug_assert!(dst.to_reg().get_class() == RegClass::V128); + debug_assert!(dst.to_reg().class() == RegClass::Float); debug_assert!(ty.is_vector() && ty.bits() == 128); Inst::XmmLoadConst { src, dst, ty } } /// Convenient helper for unary float operations. pub(crate) fn xmm_unary_rm_r(op: SseOpcode, src: RegMem, dst: Writable) -> Inst { - src.assert_regclass_is(RegClass::V128); - debug_assert!(dst.to_reg().get_class() == RegClass::V128); + src.assert_regclass_is(RegClass::Float); + debug_assert!(dst.to_reg().class() == RegClass::Float); Inst::XmmUnaryRmR { op, src: XmmMem::new(src).unwrap(), @@ -282,8 +281,8 @@ impl Inst { } pub(crate) fn xmm_unary_rm_r_evex(op: Avx512Opcode, src: RegMem, dst: Writable) -> Inst { - src.assert_regclass_is(RegClass::V128); - debug_assert!(dst.to_reg().get_class() == RegClass::V128); + src.assert_regclass_is(RegClass::Float); + debug_assert!(dst.to_reg().class() == RegClass::Float); Inst::XmmUnaryRmREvex { op, src: XmmMem::new(src).unwrap(), @@ -292,8 +291,8 @@ impl Inst { } pub(crate) fn xmm_rm_r(op: SseOpcode, src: RegMem, dst: Writable) -> Self { - src.assert_regclass_is(RegClass::V128); - debug_assert!(dst.to_reg().get_class() == RegClass::V128); + src.assert_regclass_is(RegClass::Float); + debug_assert!(dst.to_reg().class() == RegClass::Float); Inst::XmmRmR { op, src1: Xmm::new(dst.to_reg()).unwrap(), @@ -308,9 +307,9 @@ impl Inst { src2: Reg, dst: Writable, ) -> Self { - src1.assert_regclass_is(RegClass::V128); - debug_assert!(src2.get_class() == RegClass::V128); - debug_assert!(dst.to_reg().get_class() == RegClass::V128); + src1.assert_regclass_is(RegClass::Float); + debug_assert!(src2.class() == RegClass::Float); + debug_assert!(dst.to_reg().class() == RegClass::Float); Inst::XmmRmREvex { op, src1: XmmMem::new(src1).unwrap(), @@ -320,14 +319,14 @@ impl Inst { } pub(crate) fn xmm_uninit_value(dst: Writable) -> Self { - debug_assert!(dst.to_reg().get_class() == RegClass::V128); + debug_assert!(dst.to_reg().class() == RegClass::Float); Inst::XmmUninitializedValue { dst: WritableXmm::from_writable_reg(dst).unwrap(), } } pub(crate) fn xmm_mov_r_m(op: SseOpcode, src: Reg, dst: impl Into) -> Inst { - debug_assert!(src.get_class() == RegClass::V128); + debug_assert!(src.class() == RegClass::Float); Inst::XmmMovRM { op, src, @@ -341,8 +340,8 @@ impl Inst { dst: Writable, dst_size: OperandSize, ) -> Inst { - debug_assert!(src.get_class() == RegClass::V128); - debug_assert!(dst.to_reg().get_class() == RegClass::I64); + debug_assert!(src.class() == RegClass::Float); + debug_assert!(dst.to_reg().class() == RegClass::Int); debug_assert!(dst_size.is_one_of(&[OperandSize::Size32, OperandSize::Size64])); Inst::XmmToGpr { op, @@ -358,9 +357,9 @@ impl Inst { src_size: OperandSize, dst: Writable, ) -> Inst { - src.assert_regclass_is(RegClass::I64); + src.assert_regclass_is(RegClass::Int); debug_assert!(src_size.is_one_of(&[OperandSize::Size32, OperandSize::Size64])); - debug_assert!(dst.to_reg().get_class() == RegClass::V128); + debug_assert!(dst.to_reg().class() == RegClass::Float); Inst::GprToXmm { op, src: GprMem::new(src).unwrap(), @@ -370,8 +369,8 @@ impl Inst { } pub(crate) fn xmm_cmp_rm_r(op: SseOpcode, src: RegMem, dst: Reg) -> Inst { - src.assert_regclass_is(RegClass::V128); - debug_assert!(dst.get_class() == RegClass::V128); + src.assert_regclass_is(RegClass::Float); + debug_assert!(dst.class() == RegClass::Float); let src = XmmMem::new(src).unwrap(); let dst = Xmm::new(dst).unwrap(); Inst::XmmCmpRmR { op, src, dst } @@ -385,10 +384,10 @@ impl Inst { dst: Writable, ) -> Inst { debug_assert!(dst_size.is_one_of(&[OperandSize::Size32, OperandSize::Size64])); - debug_assert!(src.to_reg().get_class() == RegClass::I64); - debug_assert!(tmp_gpr1.to_reg().get_class() == RegClass::I64); - debug_assert!(tmp_gpr2.to_reg().get_class() == RegClass::I64); - debug_assert!(dst.to_reg().get_class() == RegClass::V128); + debug_assert!(src.to_reg().class() == RegClass::Int); + debug_assert!(tmp_gpr1.to_reg().class() == RegClass::Int); + debug_assert!(tmp_gpr2.to_reg().class() == RegClass::Int); + debug_assert!(dst.to_reg().class() == RegClass::Float); Inst::CvtUint64ToFloatSeq { src: WritableGpr::from_writable_reg(src).unwrap(), dst: WritableXmm::from_writable_reg(dst).unwrap(), @@ -409,10 +408,10 @@ impl Inst { ) -> Inst { debug_assert!(src_size.is_one_of(&[OperandSize::Size32, OperandSize::Size64])); debug_assert!(dst_size.is_one_of(&[OperandSize::Size32, OperandSize::Size64])); - debug_assert!(src.to_reg().get_class() == RegClass::V128); - debug_assert!(tmp_xmm.to_reg().get_class() == RegClass::V128); - debug_assert!(tmp_gpr.to_reg().get_class() == RegClass::I64); - debug_assert!(dst.to_reg().get_class() == RegClass::I64); + debug_assert!(src.to_reg().class() == RegClass::Float); + debug_assert!(tmp_xmm.to_reg().class() == RegClass::Float); + debug_assert!(tmp_gpr.to_reg().class() == RegClass::Int); + debug_assert!(dst.to_reg().class() == RegClass::Int); Inst::CvtFloatToSintSeq { src_size, dst_size, @@ -435,10 +434,10 @@ impl Inst { ) -> Inst { debug_assert!(src_size.is_one_of(&[OperandSize::Size32, OperandSize::Size64])); debug_assert!(dst_size.is_one_of(&[OperandSize::Size32, OperandSize::Size64])); - debug_assert!(src.to_reg().get_class() == RegClass::V128); - debug_assert!(tmp_xmm.to_reg().get_class() == RegClass::V128); - debug_assert!(tmp_gpr.to_reg().get_class() == RegClass::I64); - debug_assert!(dst.to_reg().get_class() == RegClass::I64); + debug_assert!(src.to_reg().class() == RegClass::Float); + debug_assert!(tmp_xmm.to_reg().class() == RegClass::Float); + debug_assert!(tmp_gpr.to_reg().class() == RegClass::Int); + debug_assert!(dst.to_reg().class() == RegClass::Int); Inst::CvtFloatToUintSeq { src_size, dst_size, @@ -459,9 +458,9 @@ impl Inst { dst: Writable, ) -> Inst { debug_assert!(size.is_one_of(&[OperandSize::Size32, OperandSize::Size64])); - debug_assert_eq!(lhs.get_class(), RegClass::V128); - debug_assert_eq!(rhs.get_class(), RegClass::V128); - debug_assert_eq!(dst.to_reg().get_class(), RegClass::V128); + debug_assert_eq!(lhs.class(), RegClass::Float); + debug_assert_eq!(rhs.class(), RegClass::Float); + debug_assert_eq!(dst.to_reg().class(), RegClass::Float); Inst::XmmMinMaxSeq { size, is_min, @@ -490,16 +489,16 @@ impl Inst { } pub(crate) fn movzx_rm_r(ext_mode: ExtMode, src: RegMem, dst: Writable) -> Inst { - src.assert_regclass_is(RegClass::I64); - debug_assert!(dst.to_reg().get_class() == RegClass::I64); + src.assert_regclass_is(RegClass::Int); + debug_assert!(dst.to_reg().class() == RegClass::Int); let src = GprMem::new(src).unwrap(); let dst = WritableGpr::from_writable_reg(dst).unwrap(); Inst::MovzxRmR { ext_mode, src, dst } } pub(crate) fn xmm_rmi_reg(opcode: SseOpcode, src: RegMemImm, dst: Writable) -> Inst { - src.assert_regclass_is(RegClass::V128); - debug_assert!(dst.to_reg().get_class() == RegClass::V128); + src.assert_regclass_is(RegClass::Float); + debug_assert!(dst.to_reg().class() == RegClass::Float); Inst::XmmRmiReg { opcode, src1: Xmm::new(dst.to_reg()).unwrap(), @@ -509,15 +508,15 @@ impl Inst { } pub(crate) fn movsx_rm_r(ext_mode: ExtMode, src: RegMem, dst: Writable) -> Inst { - src.assert_regclass_is(RegClass::I64); - debug_assert!(dst.to_reg().get_class() == RegClass::I64); + src.assert_regclass_is(RegClass::Int); + debug_assert!(dst.to_reg().class() == RegClass::Int); let src = GprMem::new(src).unwrap(); let dst = WritableGpr::from_writable_reg(dst).unwrap(); Inst::MovsxRmR { ext_mode, src, dst } } pub(crate) fn mov64_m_r(src: impl Into, dst: Writable) -> Inst { - debug_assert!(dst.to_reg().get_class() == RegClass::I64); + debug_assert!(dst.to_reg().class() == RegClass::Int); Inst::Mov64MR { src: src.into(), dst: WritableGpr::from_writable_reg(dst).unwrap(), @@ -526,7 +525,7 @@ impl Inst { /// A convenience function to be able to use a RegMem as the source of a move. pub(crate) fn mov64_rm_r(src: RegMem, dst: Writable) -> Inst { - src.assert_regclass_is(RegClass::I64); + src.assert_regclass_is(RegClass::Int); match src { RegMem::Reg { reg } => Self::mov_r_r(OperandSize::Size64, reg, dst), RegMem::Mem { addr } => Self::mov64_m_r(addr, dst), @@ -534,7 +533,7 @@ impl Inst { } pub(crate) fn mov_r_m(size: OperandSize, src: Reg, dst: impl Into) -> Inst { - debug_assert!(src.get_class() == RegClass::I64); + debug_assert!(src.class() == RegClass::Int); Inst::MovRM { size, src: Gpr::new(src).unwrap(), @@ -543,7 +542,7 @@ impl Inst { } pub(crate) fn lea(addr: impl Into, dst: Writable) -> Inst { - debug_assert!(dst.to_reg().get_class() == RegClass::I64); + debug_assert!(dst.to_reg().class() == RegClass::Int); Inst::LoadEffectiveAddress { addr: addr.into(), dst: WritableGpr::from_writable_reg(dst).unwrap(), @@ -561,7 +560,7 @@ impl Inst { } else { true }); - debug_assert!(dst.to_reg().get_class() == RegClass::I64); + debug_assert!(dst.to_reg().class() == RegClass::Int); Inst::ShiftR { size, kind, @@ -578,8 +577,8 @@ impl Inst { /// Does a comparison of dst - src for operands of size `size`, as stated by the machine /// instruction semantics. Be careful with the order of parameters! pub(crate) fn cmp_rmi_r(size: OperandSize, src: RegMemImm, dst: Reg) -> Inst { - src.assert_regclass_is(RegClass::I64); - debug_assert_eq!(dst.get_class(), RegClass::I64); + src.assert_regclass_is(RegClass::Int); + debug_assert_eq!(dst.class(), RegClass::Int); Inst::CmpRmiR { size, src: GprMemImm::new(src).unwrap(), @@ -590,8 +589,8 @@ impl Inst { /// Does a comparison of dst & src for operands of size `size`. pub(crate) fn test_rmi_r(size: OperandSize, src: RegMemImm, dst: Reg) -> Inst { - src.assert_regclass_is(RegClass::I64); - debug_assert_eq!(dst.get_class(), RegClass::I64); + src.assert_regclass_is(RegClass::Int); + debug_assert_eq!(dst.class(), RegClass::Int); Inst::CmpRmiR { size, src: GprMemImm::new(src).unwrap(), @@ -607,7 +606,7 @@ impl Inst { } pub(crate) fn setcc(cc: CC, dst: Writable) -> Inst { - debug_assert!(dst.to_reg().get_class() == RegClass::I64); + debug_assert!(dst.to_reg().class() == RegClass::Int); let dst = WritableGpr::from_writable_reg(dst).unwrap(); Inst::Setcc { cc, dst } } @@ -618,7 +617,7 @@ impl Inst { OperandSize::Size32, OperandSize::Size64 ])); - debug_assert!(dst.to_reg().get_class() == RegClass::I64); + debug_assert!(dst.to_reg().class() == RegClass::Int); Inst::Cmove { size, cc, @@ -630,8 +629,8 @@ impl Inst { pub(crate) fn xmm_cmove(size: OperandSize, cc: CC, src: RegMem, dst: Writable) -> Inst { debug_assert!(size.is_one_of(&[OperandSize::Size32, OperandSize::Size64])); - src.assert_regclass_is(RegClass::V128); - debug_assert!(dst.to_reg().get_class() == RegClass::V128); + src.assert_regclass_is(RegClass::Float); + debug_assert!(dst.to_reg().class() == RegClass::Float); let src = XmmMem::new(src).unwrap(); let dst = WritableXmm::from_writable_reg(dst).unwrap(); Inst::XmmCmove { @@ -644,13 +643,13 @@ impl Inst { } pub(crate) fn push64(src: RegMemImm) -> Inst { - src.assert_regclass_is(RegClass::I64); + src.assert_regclass_is(RegClass::Int); let src = GprMemImm::new(src).unwrap(); Inst::Push64 { src } } pub(crate) fn pop64(dst: Writable) -> Inst { - debug_assert!(dst.to_reg().get_class() == RegClass::I64); + debug_assert!(dst.to_reg().class() == RegClass::Int); let dst = WritableGpr::from_writable_reg(dst).unwrap(); Inst::Pop64 { dst } } @@ -675,7 +674,7 @@ impl Inst { defs: Vec>, opcode: Opcode, ) -> Inst { - dest.assert_regclass_is(RegClass::I64); + dest.assert_regclass_is(RegClass::Int); Inst::CallUnknown { dest, uses, @@ -684,8 +683,8 @@ impl Inst { } } - pub(crate) fn ret() -> Inst { - Inst::Ret + pub(crate) fn ret(rets: Vec) -> Inst { + Inst::Ret { rets } } pub(crate) fn epilogue_placeholder() -> Inst { @@ -709,7 +708,7 @@ impl Inst { } pub(crate) fn jmp_unknown(target: RegMem) -> Inst { - target.assert_regclass_is(RegClass::I64); + target.assert_regclass_is(RegClass::Int); Inst::JmpUnknown { target } } @@ -726,9 +725,9 @@ impl Inst { to_reg: Writable, ext_kind: ExtKind, ) -> Inst { - let rc = to_reg.to_reg().get_class(); + let rc = to_reg.to_reg().class(); match rc { - RegClass::I64 => { + RegClass::Int => { let ext_mode = match ty.bytes() { 1 => Some(ExtMode::BQ), 2 => Some(ExtMode::WQ), @@ -755,7 +754,7 @@ impl Inst { Inst::mov64_m_r(from_addr, to_reg) } } - RegClass::V128 => { + RegClass::Float => { let opcode = match ty { types::F32 => SseOpcode::Movss, types::F64 => SseOpcode::Movsd, @@ -766,16 +765,15 @@ impl Inst { }; Inst::xmm_unary_rm_r(opcode, RegMem::mem(from_addr), to_reg) } - _ => panic!("unable to generate load for register class: {:?}", rc), } } /// Choose which instruction to use for storing a register value to memory. pub(crate) fn store(ty: Type, from_reg: Reg, to_addr: impl Into) -> Inst { - let rc = from_reg.get_class(); + let rc = from_reg.class(); match rc { - RegClass::I64 => Inst::mov_r_m(OperandSize::from_ty(ty), from_reg, to_addr), - RegClass::V128 => { + RegClass::Int => Inst::mov_r_m(OperandSize::from_ty(ty), from_reg, to_addr), + RegClass::Float => { let opcode = match ty { types::F32 => SseOpcode::Movss, types::F64 => SseOpcode::Movsd, @@ -786,7 +784,6 @@ impl Inst { }; Inst::xmm_mov_r_m(opcode, from_reg, to_addr) } - _ => panic!("unable to generate store for register class: {:?}", rc), } } } @@ -831,342 +828,13 @@ impl Inst { _ => false, } } - - /// Translate three-operand instructions into a sequence of two-operand - /// instructions. - /// - /// For example: - /// - /// ```text - /// x = add a, b - /// ``` - /// - /// Becomes: - /// - /// ```text - /// mov x, a - /// add x, b - /// ``` - /// - /// The three-operand form for instructions allows our ISLE DSL code to have - /// a value-based, SSA view of the world. This method is responsible for - /// undoing that. - /// - /// Note that register allocation cleans up most of these inserted `mov`s - /// with its move coalescing. - pub(crate) fn mov_mitosis(mut self) -> impl Iterator { - log::trace!("mov_mitosis({:?})", self); - - let mut insts = SmallVec::<[Self; 4]>::new(); - - match &mut self { - Inst::AluRmiR { src1, dst, .. } => { - if *src1 != dst.to_reg() { - debug_assert!(src1.is_virtual()); - insts.push(Self::gen_move( - dst.to_writable_reg(), - src1.to_reg(), - types::I64, - )); - *src1 = dst.to_reg(); - } - insts.push(self); - } - Inst::XmmRmiReg { src1, dst, .. } => { - if *src1 != dst.to_reg() { - debug_assert!(src1.is_virtual()); - insts.push(Self::gen_move( - dst.to_writable_reg(), - src1.to_reg(), - types::I8X16, - )); - *src1 = dst.to_reg(); - } - insts.push(self); - } - Inst::XmmRmR { src1, dst, .. } => { - if *src1 != dst.to_reg() { - debug_assert!(src1.is_virtual()); - insts.push(Self::gen_move( - dst.to_writable_reg(), - src1.to_reg(), - types::I8X16, - )); - *src1 = dst.to_reg(); - } - insts.push(self); - } - Inst::XmmRmRImm { src1, dst, .. } => { - if *src1 != dst.to_reg() { - debug_assert!(src1.is_virtual()); - insts.push(Self::gen_move(*dst, *src1, types::I8X16)); - *src1 = dst.to_reg(); - } - insts.push(self); - } - Inst::XmmMinMaxSeq { rhs, dst, .. } => { - if *rhs != dst.to_reg() { - debug_assert!(rhs.is_virtual()); - insts.push(Self::gen_move( - dst.to_writable_reg(), - rhs.to_reg(), - types::I8X16, - )); - *rhs = dst.to_reg(); - } - insts.push(self); - } - Inst::Cmove { - size, - alternative, - dst, - .. - } => { - if *alternative != dst.to_reg() { - debug_assert!(alternative.is_virtual()); - insts.push(Self::mov_r_r( - *size, - alternative.to_reg(), - dst.to_writable_reg(), - )); - *alternative = dst.to_reg(); - } - insts.push(self); - } - Inst::XmmCmove { - alternative, dst, .. - } => { - if *alternative != dst.to_reg() { - debug_assert!(alternative.is_virtual()); - insts.push(Self::gen_move( - dst.to_writable_reg(), - alternative.to_reg(), - types::F32X4, - )); - *alternative = dst.to_reg(); - } - insts.push(self); - } - Inst::Not { src, dst, .. } | Inst::Neg { src, dst, .. } => { - if *src != dst.to_reg() { - debug_assert!(src.is_virtual()); - insts.push(Self::gen_move( - dst.to_writable_reg(), - src.to_reg(), - types::I64, - )); - *src = dst.to_reg(); - } - insts.push(self); - } - Inst::Div { - dividend, - dst_quotient, - dst_remainder, - .. - } - | Inst::CheckedDivOrRemSeq { - dividend, - dst_quotient, - dst_remainder, - .. - } => { - if *dividend != regs::rax() { - debug_assert!(dividend.is_virtual()); - insts.push(Self::gen_move( - Writable::from_reg(regs::rax()), - dividend.to_reg(), - types::I64, - )); - *dividend = Gpr::new(regs::rax()).unwrap(); - } - let mut quotient_mov = None; - if dst_quotient.to_reg() != regs::rax() { - debug_assert!(dst_quotient.to_reg().is_virtual()); - quotient_mov = Some(Self::gen_move( - dst_quotient.to_writable_reg(), - regs::rax(), - types::I64, - )); - *dst_quotient = Writable::from_reg(Gpr::new(regs::rax()).unwrap()); - } - let mut remainder_mov = None; - if dst_remainder.to_reg() != regs::rdx() { - debug_assert!(dst_remainder.to_reg().is_virtual()); - remainder_mov = Some(Self::gen_move( - dst_remainder.to_writable_reg(), - regs::rdx(), - types::I64, - )); - *dst_remainder = Writable::from_reg(Gpr::new(regs::rdx()).unwrap()); - } - insts.push(self); - insts.extend(quotient_mov); - insts.extend(remainder_mov); - } - Inst::MulHi { - src1, - dst_lo, - dst_hi, - .. - } => { - if *src1 != regs::rax() { - debug_assert!(src1.is_virtual()); - insts.push(Self::gen_move( - Writable::from_reg(regs::rax()), - src1.to_reg(), - types::I64, - )); - *src1 = Gpr::new(regs::rax()).unwrap(); - } - let mut dst_lo_mov = None; - if dst_lo.to_reg() != regs::rax() { - debug_assert!(dst_lo.to_reg().is_virtual()); - dst_lo_mov = Some(Self::gen_move( - dst_lo.to_writable_reg(), - regs::rax(), - types::I64, - )); - *dst_lo = Writable::from_reg(Gpr::new(regs::rax()).unwrap()); - } - let mut dst_hi_mov = None; - if dst_hi.to_reg() != regs::rdx() { - debug_assert!(dst_hi.to_reg().is_virtual()); - dst_hi_mov = Some(Self::gen_move( - dst_hi.to_writable_reg(), - regs::rdx(), - types::I64, - )); - *dst_hi = Writable::from_reg(Gpr::new(regs::rdx()).unwrap()); - } - insts.push(self); - insts.extend(dst_lo_mov); - insts.extend(dst_hi_mov); - } - Inst::SignExtendData { src, dst, .. } => { - if *src != regs::rax() { - debug_assert!(src.is_virtual()); - insts.push(Self::gen_move( - Writable::from_reg(regs::rax()), - src.to_reg(), - types::I64, - )); - *src = Gpr::new(regs::rax()).unwrap(); - } - let mut dst_mov = None; - if dst.to_reg() != regs::rax() { - debug_assert!(dst.to_reg().is_virtual()); - dst_mov = Some(Self::gen_move( - dst.to_writable_reg(), - dst.to_reg().to_reg(), - types::I64, - )); - *dst = Writable::from_reg(Gpr::new(regs::rax()).unwrap()); - } - insts.push(self); - insts.extend(dst_mov); - } - Inst::ShiftR { - src, num_bits, dst, .. - } => { - if *src != dst.to_reg() { - debug_assert!(src.is_virtual()); - insts.push(Self::gen_move( - dst.to_writable_reg(), - src.to_reg(), - types::I64, - )); - *src = dst.to_reg(); - } - if let Imm8Reg::Reg { reg } = num_bits.clone().to_imm8_reg() { - if reg != regs::rcx() { - debug_assert!(reg.is_virtual()); - insts.push(Self::gen_move( - Writable::from_reg(regs::rcx()), - reg, - types::I64, - )); - *num_bits = Imm8Gpr::new(Imm8Reg::Reg { reg: regs::rcx() }).unwrap(); - } - } - insts.push(self); - } - Inst::LockCmpxchg { - ty, - expected, - dst_old, - .. - } => { - if *expected != regs::rax() { - debug_assert!(expected.is_virtual()); - insts.push(Self::gen_move( - Writable::from_reg(regs::rax()), - *expected, - *ty, - )); - } - let mut dst_old_mov = None; - if dst_old.to_reg() != regs::rax() { - debug_assert!(dst_old.to_reg().is_virtual()); - dst_old_mov = Some(Self::gen_move(*dst_old, regs::rax(), *ty)); - *dst_old = Writable::from_reg(regs::rax()); - } - insts.push(self); - insts.extend(dst_old_mov); - } - Inst::AtomicRmwSeq { - ty, - address, - operand, - dst_old, - .. - } => { - if *address != regs::r9() { - debug_assert!(address.is_virtual()); - insts.push(Self::gen_move( - Writable::from_reg(regs::r9()), - *address, - types::I64, - )); - *address = regs::r9(); - } - if *operand != regs::r10() { - debug_assert!(operand.is_virtual()); - insts.push(Self::gen_move( - Writable::from_reg(regs::r10()), - *operand, - *ty, - )); - *address = regs::r10(); - } - let mut dst_old_mov = None; - if dst_old.to_reg() != regs::rax() { - debug_assert!(dst_old.to_reg().is_virtual()); - dst_old_mov = Some(Self::gen_move(*dst_old, regs::rax(), *ty)); - *dst_old = Writable::from_reg(regs::rax()); - } - insts.push(self); - insts.extend(dst_old_mov); - } - // No other instruction needs 3-operand to 2-operand legalization. - _ => insts.push(self), - } - - if log::log_enabled!(log::Level::Trace) { - for inst in &insts { - log::trace!(" -> {:?}", inst); - } - } - - insts.into_iter() - } } //============================================================================= // Instructions: printing impl PrettyPrint for Inst { - fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String { + fn pretty_print(&self, _size: u8, allocs: &mut AllocationConsumer<'_>) -> String { fn ljustify(s: String) -> String { let w = 7; if s.len() >= w { @@ -1219,116 +887,214 @@ impl PrettyPrint for Inst { match self { Inst::Nop { len } => format!("{} len={}", ljustify("nop".to_string()), len), + Inst::AluRmiR { size, op, dst, .. } if self.produces_const() => { + let dst = + pretty_print_reg(dst.to_reg().to_reg(), size_lqb(*size, op.is_8bit()), allocs); + format!( + "{} {}, {}, {}", + ljustify2(op.to_string(), suffix_lqb(*size, op.is_8bit())), + dst, + dst, + dst + ) + } Inst::AluRmiR { size, op, - src1: _, + src1, src2, dst, - } => format!( - "{} {}, {}", - ljustify2(op.to_string(), suffix_lqb(*size, op.is_8bit())), - src2.show_rru_sized(mb_rru, size_lqb(*size, op.is_8bit())), - show_ireg_sized(dst.to_reg().to_reg(), mb_rru, size_lqb(*size, op.is_8bit())), - ), + } => { + let size_bytes = size_lqb(*size, op.is_8bit()); + let src1 = pretty_print_reg(src1.to_reg(), size_bytes, allocs); + let dst = pretty_print_reg(dst.to_reg().to_reg(), size_bytes, allocs); + let src2 = src2.pretty_print(size_bytes, allocs); + format!( + "{} {}, {}, {}", + ljustify2(op.to_string(), suffix_lqb(*size, op.is_8bit())), + src1, + src2, + dst + ) + } + Inst::UnaryRmR { src, dst, op, size } => { + let dst = pretty_print_reg(dst.to_reg().to_reg(), size.to_bytes(), allocs); + let src = src.pretty_print(size.to_bytes(), allocs); + format!( + "{} {}, {}", + ljustify2(op.to_string(), suffix_bwlq(*size)), + src, + dst, + ) + } - Inst::UnaryRmR { src, dst, op, size } => format!( - "{} {}, {}", - ljustify2(op.to_string(), suffix_bwlq(*size)), - src.show_rru_sized(mb_rru, size.to_bytes()), - show_ireg_sized(dst.to_reg().to_reg(), mb_rru, size.to_bytes()), - ), + Inst::Not { size, src, dst } => { + let src = pretty_print_reg(src.to_reg(), size.to_bytes(), allocs); + let dst = pretty_print_reg(dst.to_reg().to_reg(), size.to_bytes(), allocs); + format!( + "{} {}, {}", + ljustify2("not".to_string(), suffix_bwlq(*size)), + src, + dst, + ) + } - Inst::Not { size, src: _, dst } => format!( - "{} {}", - ljustify2("not".to_string(), suffix_bwlq(*size)), - show_ireg_sized(dst.to_reg().to_reg(), mb_rru, size.to_bytes()) - ), - - Inst::Neg { size, src: _, dst } => format!( - "{} {}", - ljustify2("neg".to_string(), suffix_bwlq(*size)), - show_ireg_sized(dst.to_reg().to_reg(), mb_rru, size.to_bytes()) - ), + Inst::Neg { size, src, dst } => { + let src = pretty_print_reg(src.to_reg(), size.to_bytes(), allocs); + let dst = pretty_print_reg(dst.to_reg().to_reg(), size.to_bytes(), allocs); + format!( + "{} {}, {}", + ljustify2("neg".to_string(), suffix_bwlq(*size)), + src, + dst, + ) + } Inst::Div { size, signed, divisor, - .. - } => format!( - "{} {}", - ljustify(if *signed { - "idiv".to_string() - } else { - "div".into() - }), - divisor.show_rru_sized(mb_rru, size.to_bytes()) - ), + dividend_lo, + dividend_hi, + dst_quotient, + dst_remainder, + } => { + let dividend_lo = pretty_print_reg(dividend_lo.to_reg(), size.to_bytes(), allocs); + let dividend_hi = pretty_print_reg(dividend_hi.to_reg(), size.to_bytes(), allocs); + let dst_quotient = + pretty_print_reg(dst_quotient.to_reg().to_reg(), size.to_bytes(), allocs); + let dst_remainder = + pretty_print_reg(dst_remainder.to_reg().to_reg(), size.to_bytes(), allocs); + let divisor = divisor.pretty_print(size.to_bytes(), allocs); + format!( + "{} {}, {}, {}, {}, {}", + ljustify(if *signed { + "idiv".to_string() + } else { + "div".into() + }), + dividend_lo, + dividend_hi, + divisor, + dst_quotient, + dst_remainder, + ) + } Inst::MulHi { - size, signed, src2, .. - } => format!( - "{} {}", - ljustify(if *signed { - "imul".to_string() - } else { - "mul".to_string() - }), - src2.show_rru_sized(mb_rru, size.to_bytes()) - ), + size, + signed, + src1, + src2, + dst_lo, + dst_hi, + } => { + let src1 = pretty_print_reg(src1.to_reg(), size.to_bytes(), allocs); + let dst_lo = pretty_print_reg(dst_lo.to_reg().to_reg(), size.to_bytes(), allocs); + let dst_hi = pretty_print_reg(dst_hi.to_reg().to_reg(), size.to_bytes(), allocs); + let src2 = src2.pretty_print(size.to_bytes(), allocs); + format!( + "{} {}, {}, {}, {}", + ljustify(if *signed { + "imul".to_string() + } else { + "mul".to_string() + }), + src1, + src2, + dst_lo, + dst_hi, + ) + } Inst::CheckedDivOrRemSeq { kind, size, divisor, - .. - } => format!( - "{} $rax:$rdx, {}", - match kind { - DivOrRemKind::SignedDiv => "sdiv", - DivOrRemKind::UnsignedDiv => "udiv", - DivOrRemKind::SignedRem => "srem", - DivOrRemKind::UnsignedRem => "urem", - }, - show_ireg_sized(divisor.to_reg().to_reg(), mb_rru, size.to_bytes()), - ), - - Inst::SignExtendData { size, .. } => match size { - OperandSize::Size8 => "cbw", - OperandSize::Size16 => "cwd", - OperandSize::Size32 => "cdq", - OperandSize::Size64 => "cqo", + dividend_lo, + dividend_hi, + dst_quotient, + dst_remainder, + tmp, + } => { + let dividend_lo = pretty_print_reg(dividend_lo.to_reg(), size.to_bytes(), allocs); + let dividend_hi = pretty_print_reg(dividend_hi.to_reg(), size.to_bytes(), allocs); + let divisor = pretty_print_reg(divisor.to_reg().to_reg(), size.to_bytes(), allocs); + let dst_quotient = + pretty_print_reg(dst_quotient.to_reg().to_reg(), size.to_bytes(), allocs); + let dst_remainder = + pretty_print_reg(dst_remainder.to_reg().to_reg(), size.to_bytes(), allocs); + let tmp = tmp + .map(|tmp| pretty_print_reg(tmp.to_reg().to_reg(), size.to_bytes(), allocs)) + .unwrap_or("(none)".to_string()); + format!( + "{} {}, {}, {}, {}, {}, tmp={}", + match kind { + DivOrRemKind::SignedDiv => "sdiv_seq", + DivOrRemKind::UnsignedDiv => "udiv_seq", + DivOrRemKind::SignedRem => "srem_seq", + DivOrRemKind::UnsignedRem => "urem_seq", + }, + dividend_lo, + dividend_hi, + divisor, + dst_quotient, + dst_remainder, + tmp, + ) } - .into(), - Inst::XmmUnaryRmR { op, src, dst, .. } => format!( - "{} {}, {}", - ljustify(op.to_string()), - src.show_rru_sized(mb_rru, op.src_size()), - show_ireg_sized(dst.to_reg().to_reg(), mb_rru, 8), - ), + Inst::SignExtendData { size, src, dst } => { + let src = pretty_print_reg(src.to_reg(), size.to_bytes(), allocs); + let dst = pretty_print_reg(dst.to_reg().to_reg(), size.to_bytes(), allocs); + format!( + "{} {}, {}", + match size { + OperandSize::Size8 => "cbw", + OperandSize::Size16 => "cwd", + OperandSize::Size32 => "cdq", + OperandSize::Size64 => "cqo", + }, + src, + dst, + ) + } - Inst::XmmUnaryRmREvex { op, src, dst, .. } => format!( - "{} {}, {}", - ljustify(op.to_string()), - src.show_rru_sized(mb_rru, 8), - show_ireg_sized(dst.to_reg().to_reg(), mb_rru, 8), - ), + Inst::XmmUnaryRmR { op, src, dst, .. } => { + let dst = pretty_print_reg(dst.to_reg().to_reg(), op.src_size(), allocs); + let src = src.pretty_print(op.src_size(), allocs); + format!("{} {}, {}", ljustify(op.to_string()), src, dst) + } - Inst::XmmMovRM { op, src, dst, .. } => format!( - "{} {}, {}", - ljustify(op.to_string()), - show_ireg_sized(*src, mb_rru, 8), - dst.show_rru(mb_rru), - ), + Inst::XmmUnaryRmREvex { op, src, dst, .. } => { + let dst = pretty_print_reg(dst.to_reg().to_reg(), 8, allocs); + let src = src.pretty_print(8, allocs); + format!("{} {}, {}", ljustify(op.to_string()), src, dst) + } - Inst::XmmRmR { op, src2, dst, .. } => format!( - "{} {}, {}", - ljustify(op.to_string()), - src2.show_rru_sized(mb_rru, 8), - show_ireg_sized(dst.to_reg().to_reg(), mb_rru, 8), - ), + Inst::XmmMovRM { op, src, dst, .. } => { + let src = pretty_print_reg(*src, 8, allocs); + let dst = dst.pretty_print(8, allocs); + format!("{} {}, {}", ljustify(op.to_string()), src, dst) + } + + Inst::XmmRmR { op, dst, .. } if self.produces_const() => { + let dst = pretty_print_reg(dst.to_reg().to_reg(), 8, allocs); + format!("{} {}, {}, {}", ljustify(op.to_string()), dst, dst, dst) + } + + Inst::XmmRmR { + op, + src1, + src2, + dst, + .. + } => { + let src1 = pretty_print_reg(src1.to_reg(), 8, allocs); + let src2 = src2.pretty_print(8, allocs); + let dst = pretty_print_reg(dst.to_reg().to_reg(), 8, allocs); + format!("{} {}, {}, {}", ljustify(op.to_string()), src1, src2, dst) + } Inst::XmmRmREvex { op, @@ -1336,13 +1102,12 @@ impl PrettyPrint for Inst { src2, dst, .. - } => format!( - "{} {}, {}, {}", - ljustify(op.to_string()), - src1.show_rru_sized(mb_rru, 8), - show_ireg_sized(src2.to_reg(), mb_rru, 8), - show_ireg_sized(dst.to_reg().to_reg(), mb_rru, 8), - ), + } => { + let src2 = pretty_print_reg(src2.to_reg(), 8, allocs); + let dst = pretty_print_reg(dst.to_reg().to_reg(), 8, allocs); + let src1 = src1.pretty_print(8, allocs); + format!("{} {}, {}, {}", ljustify(op.to_string()), src1, src2, dst) + } Inst::XmmMinMaxSeq { lhs, @@ -1350,50 +1115,90 @@ impl PrettyPrint for Inst { dst, is_min, size, - } => format!( - "{} {}, {}, {}", - ljustify2( - if *is_min { - "xmm min seq ".to_string() - } else { - "xmm max seq ".to_string() - }, - format!("f{}", size.to_bits()) - ), - show_ireg_sized(lhs.to_reg(), mb_rru, 8), - show_ireg_sized(rhs.to_reg(), mb_rru, 8), - show_ireg_sized(dst.to_reg().to_reg(), mb_rru, 8), - ), + } => { + let rhs = pretty_print_reg(rhs.to_reg(), 8, allocs); + let lhs = pretty_print_reg(lhs.to_reg(), 8, allocs); + let dst = pretty_print_reg(dst.to_reg().to_reg(), 8, allocs); + format!( + "{} {}, {}, {}", + ljustify2( + if *is_min { + "xmm min seq ".to_string() + } else { + "xmm max seq ".to_string() + }, + format!("f{}", size.to_bits()) + ), + lhs, + rhs, + dst + ) + } + + Inst::XmmRmRImm { + op, dst, imm, size, .. + } if self.produces_const() => { + let dst = pretty_print_reg(dst.to_reg(), 8, allocs); + format!( + "{} ${}, {}, {}, {}", + ljustify(format!( + "{}{}", + op.to_string(), + if *size == OperandSize::Size64 { + ".w" + } else { + "" + } + )), + imm, + dst, + dst, + dst, + ) + } Inst::XmmRmRImm { op, + src1, src2, dst, imm, size, .. - } => format!( - "{} ${}, {}, {}", - ljustify(format!( - "{}{}", - op.to_string(), - if *size == OperandSize::Size64 { - ".w" - } else { - "" - } - )), - imm, - src2.show_rru(mb_rru), - dst.show_rru(mb_rru), - ), + } => { + let src1 = if op.uses_src1() { + pretty_print_reg(*src1, 8, allocs) + ", " + } else { + "".into() + }; + let dst = pretty_print_reg(dst.to_reg(), 8, allocs); + let src2 = src2.pretty_print(8, allocs); + format!( + "{} ${}, {}{}, {}", + ljustify(format!( + "{}{}", + op.to_string(), + if *size == OperandSize::Size64 { + ".w" + } else { + "" + } + )), + imm, + src1, + src2, + dst, + ) + } Inst::XmmUninitializedValue { dst } => { - format!("{} {}", ljustify("uninit".into()), dst.show_rru(mb_rru),) + let dst = pretty_print_reg(dst.to_reg().to_reg(), 8, allocs); + format!("{} {}", ljustify("uninit".into()), dst) } Inst::XmmLoadConst { src, dst, .. } => { - format!("load_const {:?}, {}", src, dst.show_rru(mb_rru),) + let dst = pretty_print_reg(dst.to_reg(), 8, allocs); + format!("load_const {:?}, {}", src, dst) } Inst::XmmToGpr { @@ -1403,12 +1208,9 @@ impl PrettyPrint for Inst { dst_size, } => { let dst_size = dst_size.to_bytes(); - format!( - "{} {}, {}", - ljustify(op.to_string()), - src.show_rru(mb_rru), - show_ireg_sized(dst.to_reg().to_reg(), mb_rru, dst_size), - ) + let src = pretty_print_reg(src.to_reg(), 8, allocs); + let dst = pretty_print_reg(dst.to_reg().to_reg(), dst_size, allocs); + format!("{} {}, {}", ljustify(op.to_string()), src, dst) } Inst::GprToXmm { @@ -1416,179 +1218,243 @@ impl PrettyPrint for Inst { src, src_size, dst, - } => format!( - "{} {}, {}", - ljustify(op.to_string()), - src.show_rru_sized(mb_rru, src_size.to_bytes()), - dst.show_rru(mb_rru) - ), + } => { + let dst = pretty_print_reg(dst.to_reg().to_reg(), 8, allocs); + let src = src.pretty_print(src_size.to_bytes(), allocs); + format!("{} {}, {}", ljustify(op.to_string()), src, dst) + } - Inst::XmmCmpRmR { op, src, dst } => format!( - "{} {}, {}", - ljustify(op.to_string()), - src.show_rru_sized(mb_rru, 8), - show_ireg_sized(dst.to_reg(), mb_rru, 8), - ), + Inst::XmmCmpRmR { op, src, dst } => { + let dst = pretty_print_reg(dst.to_reg(), 8, allocs); + let src = src.pretty_print(8, allocs); + format!("{} {}, {}", ljustify(op.to_string()), src, dst) + } Inst::CvtUint64ToFloatSeq { - src, dst, dst_size, .. - } => format!( - "{} {}, {}", - ljustify(format!( - "u64_to_{}_seq", - if *dst_size == OperandSize::Size64 { - "f64" - } else { - "f32" - } - )), - show_ireg_sized(src.to_reg().to_reg(), mb_rru, 8), - dst.show_rru(mb_rru), - ), + src, + dst, + dst_size, + tmp_gpr1, + tmp_gpr2, + .. + } => { + let src = pretty_print_reg(src.to_reg().to_reg(), 8, allocs); + let dst = pretty_print_reg(dst.to_reg().to_reg(), dst_size.to_bytes(), allocs); + let tmp_gpr1 = pretty_print_reg(tmp_gpr1.to_reg().to_reg(), 8, allocs); + let tmp_gpr2 = pretty_print_reg(tmp_gpr2.to_reg().to_reg(), 8, allocs); + format!( + "{} {}, {}, {}, {}", + ljustify(format!( + "u64_to_{}_seq", + if *dst_size == OperandSize::Size64 { + "f64" + } else { + "f32" + } + )), + src, + dst, + tmp_gpr1, + tmp_gpr2 + ) + } Inst::CvtFloatToSintSeq { src, dst, src_size, dst_size, + tmp_xmm, + tmp_gpr, .. - } => format!( - "{} {}, {}", - ljustify(format!( - "cvt_float{}_to_sint{}_seq", - src_size.to_bits(), - dst_size.to_bits() - )), - show_ireg_sized(src.to_reg().to_reg(), mb_rru, 8), - show_ireg_sized(dst.to_reg().to_reg(), mb_rru, dst_size.to_bytes()), - ), + } => { + let src = pretty_print_reg(src.to_reg().to_reg(), src_size.to_bytes(), allocs); + let dst = pretty_print_reg(dst.to_reg().to_reg(), dst_size.to_bytes(), allocs); + let tmp_gpr = pretty_print_reg(tmp_gpr.to_reg().to_reg(), 8, allocs); + let tmp_xmm = pretty_print_reg(tmp_xmm.to_reg().to_reg(), 8, allocs); + format!( + "{} {}, {}, {}, {}", + ljustify(format!( + "cvt_float{}_to_sint{}_seq", + src_size.to_bits(), + dst_size.to_bits() + )), + src, + dst, + tmp_gpr, + tmp_xmm, + ) + } Inst::CvtFloatToUintSeq { src, dst, src_size, dst_size, + tmp_gpr, + tmp_xmm, .. - } => format!( - "{} {}, {}", - ljustify(format!( - "cvt_float{}_to_uint{}_seq", - src_size.to_bits(), - dst_size.to_bits() - )), - show_ireg_sized(src.to_reg().to_reg(), mb_rru, 8), - show_ireg_sized(dst.to_reg().to_reg(), mb_rru, dst_size.to_bytes()), - ), + } => { + let src = pretty_print_reg(src.to_reg().to_reg(), src_size.to_bytes(), allocs); + let dst = pretty_print_reg(dst.to_reg().to_reg(), dst_size.to_bytes(), allocs); + let tmp_gpr = pretty_print_reg(tmp_gpr.to_reg().to_reg(), 8, allocs); + let tmp_xmm = pretty_print_reg(tmp_xmm.to_reg().to_reg(), 8, allocs); + format!( + "{} {}, {}, {}, {}", + ljustify(format!( + "cvt_float{}_to_uint{}_seq", + src_size.to_bits(), + dst_size.to_bits() + )), + src, + dst, + tmp_gpr, + tmp_xmm, + ) + } Inst::Imm { dst_size, simm64, dst, } => { + let dst = pretty_print_reg(dst.to_reg().to_reg(), dst_size.to_bytes(), allocs); if *dst_size == OperandSize::Size64 { format!( "{} ${}, {}", ljustify("movabsq".to_string()), *simm64 as i64, - show_ireg_sized(dst.to_reg().to_reg(), mb_rru, 8) + dst, ) } else { format!( "{} ${}, {}", ljustify("movl".to_string()), (*simm64 as u32) as i32, - show_ireg_sized(dst.to_reg().to_reg(), mb_rru, 4) + dst, ) } } - Inst::MovRR { size, src, dst } => format!( - "{} {}, {}", - ljustify2("mov".to_string(), suffix_lq(*size)), - show_ireg_sized(src.to_reg(), mb_rru, size.to_bytes()), - show_ireg_sized(dst.to_reg().to_reg(), mb_rru, size.to_bytes()) - ), + Inst::MovRR { size, src, dst } => { + let src = pretty_print_reg(src.to_reg(), size.to_bytes(), allocs); + let dst = pretty_print_reg(dst.to_reg().to_reg(), size.to_bytes(), allocs); + format!( + "{} {}, {}", + ljustify2("mov".to_string(), suffix_lq(*size)), + src, + dst + ) + } Inst::MovzxRmR { ext_mode, src, dst, .. } => { + let dst_size = if *ext_mode == ExtMode::LQ { + 4 + } else { + ext_mode.dst_size() + }; + let dst = pretty_print_reg(dst.to_reg().to_reg(), dst_size, allocs); + let src = src.pretty_print(ext_mode.src_size(), allocs); if *ext_mode == ExtMode::LQ { - format!( - "{} {}, {}", - ljustify("movl".to_string()), - src.show_rru_sized(mb_rru, ext_mode.src_size()), - show_ireg_sized(dst.to_reg().to_reg(), mb_rru, 4) - ) + format!("{} {}, {}", ljustify("movl".to_string()), src, dst) } else { format!( "{} {}, {}", ljustify2("movz".to_string(), ext_mode.to_string()), - src.show_rru_sized(mb_rru, ext_mode.src_size()), - show_ireg_sized(dst.to_reg().to_reg(), mb_rru, ext_mode.dst_size()) + src, + dst, ) } } - Inst::Mov64MR { src, dst, .. } => format!( - "{} {}, {}", - ljustify("movq".to_string()), - src.show_rru(mb_rru), - dst.show_rru(mb_rru) - ), + Inst::Mov64MR { src, dst, .. } => { + let dst = pretty_print_reg(dst.to_reg().to_reg(), 8, allocs); + let src = src.pretty_print(8, allocs); + format!("{} {}, {}", ljustify("movq".to_string()), src, dst) + } - Inst::LoadEffectiveAddress { addr, dst } => format!( - "{} {}, {}", - ljustify("lea".to_string()), - addr.show_rru(mb_rru), - dst.show_rru(mb_rru) - ), + Inst::LoadEffectiveAddress { addr, dst } => { + let dst = pretty_print_reg(dst.to_reg().to_reg(), 8, allocs); + let addr = addr.pretty_print(8, allocs); + format!("{} {}, {}", ljustify("lea".to_string()), addr, dst) + } Inst::MovsxRmR { ext_mode, src, dst, .. - } => format!( - "{} {}, {}", - ljustify2("movs".to_string(), ext_mode.to_string()), - src.show_rru_sized(mb_rru, ext_mode.src_size()), - show_ireg_sized(dst.to_reg().to_reg(), mb_rru, ext_mode.dst_size()) - ), + } => { + let dst = pretty_print_reg(dst.to_reg().to_reg(), ext_mode.dst_size(), allocs); + let src = src.pretty_print(ext_mode.src_size(), allocs); + format!( + "{} {}, {}", + ljustify2("movs".to_string(), ext_mode.to_string()), + src, + dst + ) + } - Inst::MovRM { size, src, dst, .. } => format!( - "{} {}, {}", - ljustify2("mov".to_string(), suffix_bwlq(*size)), - show_ireg_sized(src.to_reg(), mb_rru, size.to_bytes()), - dst.show_rru(mb_rru) - ), + Inst::MovRM { size, src, dst, .. } => { + let src = pretty_print_reg(src.to_reg(), size.to_bytes(), allocs); + let dst = dst.pretty_print(size.to_bytes(), allocs); + format!( + "{} {}, {}", + ljustify2("mov".to_string(), suffix_bwlq(*size)), + src, + dst + ) + } Inst::ShiftR { size, kind, num_bits, + src, dst, .. - } => match num_bits.clone().to_imm8_reg() { - Imm8Reg::Reg { reg } => format!( - "{} {}, {}", - ljustify2(kind.to_string(), suffix_bwlq(*size)), - show_ireg_sized(reg, mb_rru, 1), - show_ireg_sized(dst.to_reg().to_reg(), mb_rru, size.to_bytes()) - ), + } => { + let src = pretty_print_reg(src.to_reg(), size.to_bytes(), allocs); + let dst = pretty_print_reg(dst.to_reg().to_reg(), size.to_bytes(), allocs); + match num_bits.clone().to_imm8_reg() { + Imm8Reg::Reg { reg } => { + let reg = pretty_print_reg(reg, 1, allocs); + format!( + "{} {}, {}, {}", + ljustify2(kind.to_string(), suffix_bwlq(*size)), + reg, + src, + dst, + ) + } - Imm8Reg::Imm8 { imm: num_bits } => format!( - "{} ${}, {}", - ljustify2(kind.to_string(), suffix_bwlq(*size)), - num_bits, - show_ireg_sized(dst.to_reg().to_reg(), mb_rru, size.to_bytes()) - ), - }, + Imm8Reg::Imm8 { imm: num_bits } => format!( + "{} ${}, {}, {}", + ljustify2(kind.to_string(), suffix_bwlq(*size)), + num_bits, + src, + dst, + ), + } + } Inst::XmmRmiReg { - opcode, src2, dst, .. - } => format!( - "{} {}, {}", - ljustify(opcode.to_string()), - src2.show_rru(mb_rru), - dst.to_reg().show_rru(mb_rru) - ), + opcode, + src1, + src2, + dst, + .. + } => { + let src1 = pretty_print_reg(src1.to_reg(), 8, allocs); + let dst = pretty_print_reg(dst.to_reg().to_reg(), 8, allocs); + let src2 = src2.pretty_print(8, allocs); + format!( + "{} {}, {}, {}", + ljustify(opcode.to_string()), + src1, + src2, + dst, + ) + } Inst::CmpRmiR { size, @@ -1596,6 +1462,8 @@ impl PrettyPrint for Inst { dst, opcode, } => { + let dst = pretty_print_reg(dst.to_reg(), size.to_bytes(), allocs); + let src = src.pretty_print(size.to_bytes(), allocs); let op = match opcode { CmpOpcode::Cmp => "cmp", CmpOpcode::Test => "test", @@ -1603,67 +1471,79 @@ impl PrettyPrint for Inst { format!( "{} {}, {}", ljustify2(op.to_string(), suffix_bwlq(*size)), - src.show_rru_sized(mb_rru, size.to_bytes()), - show_ireg_sized(dst.to_reg(), mb_rru, size.to_bytes()) + src, + dst, ) } - Inst::Setcc { cc, dst } => format!( - "{} {}", - ljustify2("set".to_string(), cc.to_string()), - show_ireg_sized(dst.to_reg().to_reg(), mb_rru, 1) - ), + Inst::Setcc { cc, dst } => { + let dst = pretty_print_reg(dst.to_reg().to_reg(), 1, allocs); + format!("{} {}", ljustify2("set".to_string(), cc.to_string()), dst) + } Inst::Cmove { size, cc, - consequent: src, - alternative: _, + consequent, + alternative, dst, - } => format!( - "{} {}, {}", - ljustify(format!("cmov{}{}", cc.to_string(), suffix_bwlq(*size))), - src.show_rru_sized(mb_rru, size.to_bytes()), - show_ireg_sized(dst.to_reg().to_reg(), mb_rru, size.to_bytes()) - ), + } => { + let alternative = pretty_print_reg(alternative.to_reg(), size.to_bytes(), allocs); + let dst = pretty_print_reg(dst.to_reg().to_reg(), size.to_bytes(), allocs); + let consequent = consequent.pretty_print(size.to_bytes(), allocs); + format!( + "{} {}, {}, {}", + ljustify(format!("cmov{}{}", cc.to_string(), suffix_bwlq(*size))), + consequent, + alternative, + dst, + ) + } Inst::XmmCmove { size, cc, - consequent: src, + consequent, + alternative, dst, .. } => { + let alternative = pretty_print_reg(alternative.to_reg(), size.to_bytes(), allocs); + let dst = pretty_print_reg(dst.to_reg().to_reg(), size.to_bytes(), allocs); + let consequent = consequent.pretty_print(size.to_bytes(), allocs); format!( - "j{} $next; mov{} {}, {}; $next: ", + "mov {}, {}; j{} $next; mov{} {}, {}; $next: ", cc.invert().to_string(), if *size == OperandSize::Size64 { "sd" } else { "ss" }, - src.show_rru_sized(mb_rru, size.to_bytes()), - show_ireg_sized(dst.to_reg().to_reg(), mb_rru, size.to_bytes()) + consequent, + dst, + alternative, + dst, ) } Inst::Push64 { src } => { - format!("{} {}", ljustify("pushq".to_string()), src.show_rru(mb_rru)) + let src = src.pretty_print(8, allocs); + format!("{} {}", ljustify("pushq".to_string()), src) } Inst::Pop64 { dst } => { - format!("{} {}", ljustify("popq".to_string()), dst.show_rru(mb_rru)) + let dst = pretty_print_reg(dst.to_reg().to_reg(), 8, allocs); + format!("{} {}", ljustify("popq".to_string()), dst) } Inst::CallKnown { dest, .. } => format!("{} {:?}", ljustify("call".to_string()), dest), - Inst::CallUnknown { dest, .. } => format!( - "{} *{}", - ljustify("call".to_string()), - dest.show_rru(mb_rru) - ), + Inst::CallUnknown { dest, .. } => { + let dest = dest.pretty_print(8, allocs); + format!("{} *{}", ljustify("call".to_string()), dest) + } - Inst::Ret => "ret".to_string(), + Inst::Ret { .. } => "ret".to_string(), Inst::EpiloguePlaceholder => "epilogue placeholder".to_string(), @@ -1689,14 +1569,14 @@ impl PrettyPrint for Inst { ), Inst::JmpTableSeq { idx, .. } => { - format!("{} {}", ljustify("br_table".into()), idx.show_rru(mb_rru)) + let idx = pretty_print_reg(*idx, 8, allocs); + format!("{} {}", ljustify("br_table".into()), idx) } - Inst::JmpUnknown { target } => format!( - "{} *{}", - ljustify("jmp".to_string()), - target.show_rru(mb_rru) - ), + Inst::JmpUnknown { target } => { + let target = target.pretty_print(8, allocs); + format!("{} *{}", ljustify("jmp".to_string()), target) + } Inst::TrapIf { cc, trap_code, .. } => { format!("j{} ; ud2 {} ;", cc.invert().to_string(), trap_code) @@ -1704,26 +1584,37 @@ impl PrettyPrint for Inst { Inst::LoadExtName { dst, name, offset, .. - } => format!( - "{} {}+{}, {}", - ljustify("load_ext_name".into()), - name, - offset, - show_ireg_sized(dst.to_reg(), mb_rru, 8), - ), + } => { + let dst = pretty_print_reg(dst.to_reg(), 8, allocs); + format!( + "{} {}+{}, {}", + ljustify("load_ext_name".into()), + name, + offset, + dst, + ) + } Inst::LockCmpxchg { ty, replacement, + expected, mem, + dst_old, .. } => { let size = ty.bytes() as u8; + let replacement = pretty_print_reg(*replacement, size, allocs); + let expected = pretty_print_reg(*expected, size, allocs); + let dst_old = pretty_print_reg(dst_old.to_reg(), size, allocs); + let mem = mem.pretty_print(size, allocs); format!( - "lock cmpxchg{} {}, {}", + "lock cmpxchg{} {}, {}, expected={}, dst_old={}", suffix_bwlq(OperandSize::from_bytes(size as u32)), - show_ireg_sized(*replacement, mb_rru, size), - mem.show_rru(mb_rru) + replacement, + mem, + expected, + dst_old, ) } @@ -1753,71 +1644,74 @@ impl PrettyPrint for Inst { format!("macho_tls_get_addr {:?}", symbol) } - Inst::ValueLabelMarker { label, reg } => { - format!("value_label {:?}, {}", label, reg.show_rru(mb_rru)) - } - Inst::Unwind { inst } => { format!("unwind {:?}", inst) } + + Inst::DummyUse { reg } => { + let reg = pretty_print_reg(*reg, 8, allocs); + format!("dummy_use {}", reg) + } } } } -// Temp hook for legacy printing machinery impl fmt::Debug for Inst { fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { - // Print the insn without a Universe :-( - write!(fmt, "{}", self.show_rru(None)) + write!( + fmt, + "{}", + self.pretty_print_inst(&[], &mut Default::default()) + ) } } -fn x64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) { - // This is a bit subtle. If some register is in the modified set, then it may not be in either - // the use or def sets. However, enforcing that directly is somewhat difficult. Instead, - // regalloc.rs will "fix" this for us by removing the modified set from the use and def - // sets. +fn x64_get_operands VReg>(inst: &Inst, collector: &mut OperandCollector<'_, F>) { + // FIXME: remove all remaining `mod` operands here to get to pure + // SSA. + + // Note: because we need to statically know the indices of each + // reg in the operands list in order to fetch its allocation + // later, we put the variable-operand-count bits (the RegMem, + // RegMemImm, etc args) last. regalloc2 doesn't care what order + // the operands come in; they can be freely reordered. + + // N.B.: we MUST keep the below in careful sync with (i) emission, + // in `emit.rs`, and (ii) pretty-printing, in the `pretty_print` + // method above. match inst { Inst::AluRmiR { src1, src2, dst, .. } => { - debug_assert_eq!(*src1, dst.to_reg()); if inst.produces_const() { - // No need to account for src2, since src2 == dst. - collector.add_def(dst.to_writable_reg()); + collector.reg_def(dst.to_writable_reg()); } else { - src2.get_regs_as_uses(collector); - collector.add_mod(dst.to_writable_reg()); + collector.reg_use(src1.to_reg()); + collector.reg_reuse_def(dst.to_writable_reg(), 0); + src2.get_operands(collector); } } Inst::Not { src, dst, .. } => { - debug_assert_eq!(*src, dst.to_reg()); - collector.add_mod(dst.to_writable_reg()); + collector.reg_use(src.to_reg()); + collector.reg_reuse_def(dst.to_writable_reg(), 0); } Inst::Neg { src, dst, .. } => { - debug_assert_eq!(*src, dst.to_reg()); - collector.add_mod(dst.to_writable_reg()); + collector.reg_use(src.to_reg()); + collector.reg_reuse_def(dst.to_writable_reg(), 0); } Inst::Div { - size, divisor, - dividend, + dividend_lo, + dividend_hi, dst_quotient, dst_remainder, .. } => { - debug_assert_eq!(*dividend, regs::rax()); - debug_assert_eq!(dst_quotient.to_reg(), regs::rax()); - collector.add_mod(Writable::from_reg(regs::rax())); - - debug_assert_eq!(dst_remainder.to_reg(), regs::rdx()); - if *size == OperandSize::Size8 { - collector.add_def(Writable::from_reg(regs::rdx())); - } else { - collector.add_mod(Writable::from_reg(regs::rdx())); - } - - divisor.get_regs_as_uses(collector); + collector.reg_fixed_use(dividend_lo.to_reg(), regs::rax()); + collector.reg_fixed_use(dividend_hi.to_reg(), regs::rdx()); + collector.reg_fixed_def(dst_quotient.to_writable_reg(), regs::rax()); + collector.reg_fixed_def(dst_remainder.to_writable_reg(), regs::rdx()); + divisor.get_operands(collector); } Inst::MulHi { src1, @@ -1826,54 +1720,52 @@ fn x64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) { dst_hi, .. } => { - debug_assert_eq!(*src1, regs::rax()); - debug_assert_eq!(dst_lo.to_reg(), regs::rax()); - collector.add_mod(Writable::from_reg(regs::rax())); - - debug_assert_eq!(dst_hi.to_reg(), regs::rdx()); - collector.add_def(Writable::from_reg(regs::rdx())); - - src2.get_regs_as_uses(collector); + collector.reg_fixed_use(src1.to_reg(), regs::rax()); + collector.reg_fixed_def(dst_lo.to_writable_reg(), regs::rax()); + collector.reg_fixed_def(dst_hi.to_writable_reg(), regs::rdx()); + src2.get_operands(collector); } Inst::CheckedDivOrRemSeq { divisor, - dividend, + dividend_lo, + dividend_hi, dst_quotient, dst_remainder, tmp, .. } => { - debug_assert_eq!(*dividend, regs::rax()); - debug_assert_eq!(dst_quotient.to_reg(), regs::rax()); - debug_assert_eq!(dst_remainder.to_reg(), regs::rdx()); - // Mark both fixed registers as mods, to avoid an early clobber problem in codegen - // (i.e. the temporary is allocated one of the fixed registers). This requires writing - // the rdx register *before* the instruction, which is not too bad. - collector.add_mod(Writable::from_reg(regs::rax())); - collector.add_mod(Writable::from_reg(regs::rdx())); - collector.add_mod(divisor.to_writable_reg()); + collector.reg_fixed_use(dividend_lo.to_reg(), regs::rax()); + collector.reg_fixed_use(dividend_hi.to_reg(), regs::rdx()); + collector.reg_mod(divisor.to_writable_reg()); + collector.reg_fixed_def(dst_quotient.to_writable_reg(), regs::rax()); + collector.reg_fixed_def(dst_remainder.to_writable_reg(), regs::rdx()); if let Some(tmp) = tmp { - collector.add_def(tmp.to_writable_reg()); + collector.reg_early_def(tmp.to_writable_reg()); } } Inst::SignExtendData { size, src, dst } => { - debug_assert_eq!(*src, regs::rax()); - debug_assert_eq!(dst.to_reg(), regs::rdx()); match size { - OperandSize::Size8 => collector.add_mod(Writable::from_reg(regs::rax())), + OperandSize::Size8 => { + // Note `rax` on both src and dest: 8->16 extend + // does AL -> AX. + collector.reg_fixed_use(src.to_reg(), regs::rax()); + collector.reg_fixed_def(dst.to_writable_reg(), regs::rax()); + } _ => { - collector.add_use(regs::rax()); - collector.add_def(Writable::from_reg(regs::rdx())); + // All other widths do RAX -> RDX (AX -> DX:AX, + // EAX -> EDX:EAX). + collector.reg_fixed_use(src.to_reg(), regs::rax()); + collector.reg_fixed_def(dst.to_writable_reg(), regs::rdx()); } } } Inst::UnaryRmR { src, dst, .. } => { - src.clone().to_reg_mem().get_regs_as_uses(collector); - collector.add_def(dst.to_writable_reg()); + collector.reg_def(dst.to_writable_reg()); + src.get_operands(collector); } Inst::XmmUnaryRmR { src, dst, .. } | Inst::XmmUnaryRmREvex { src, dst, .. } => { - src.clone().to_reg_mem().get_regs_as_uses(collector); - collector.add_def(dst.to_writable_reg()); + collector.reg_def(dst.to_writable_reg()); + src.get_operands(collector); } Inst::XmmRmR { src1, @@ -1882,19 +1774,19 @@ fn x64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) { op, .. } => { - debug_assert_eq!(*src1, dst.to_reg()); if inst.produces_const() { - // No need to account for src, since src == dst. - collector.add_def(dst.to_writable_reg()); + collector.reg_def(dst.to_writable_reg()); } else { - src2.clone().to_reg_mem().get_regs_as_uses(collector); - collector.add_mod(dst.to_writable_reg()); + collector.reg_use(src1.to_reg()); + collector.reg_reuse_def(dst.to_writable_reg(), 0); + src2.get_operands(collector); + // Some instructions have an implicit use of XMM0. if *op == SseOpcode::Blendvpd || *op == SseOpcode::Blendvps || *op == SseOpcode::Pblendvb { - collector.add_use(regs::xmm0()); + collector.reg_use(regs::xmm0()); } } } @@ -1905,12 +1797,12 @@ fn x64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) { dst, .. } => { - src1.clone().to_reg_mem().get_regs_as_uses(collector); - collector.add_use(src2.to_reg()); match *op { - Avx512Opcode::Vpermi2b => collector.add_mod(dst.to_writable_reg()), - _ => collector.add_def(dst.to_writable_reg()), + Avx512Opcode::Vpermi2b => collector.reg_mod(dst.to_writable_reg()), + _ => collector.reg_def(dst.to_writable_reg()), } + collector.reg_use(src2.to_reg()); + src1.get_operands(collector); } Inst::XmmRmRImm { op, @@ -1919,63 +1811,56 @@ fn x64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) { dst, .. } => { - debug_assert_eq!(*src1, dst.to_reg()); if inst.produces_const() { - // No need to account for src2, since src2 == dst. - debug_assert_eq!(src2.to_reg(), Some(dst.to_reg())); - collector.add_def(*dst); - } else if *op == SseOpcode::Pextrb - || *op == SseOpcode::Pextrw - || *op == SseOpcode::Pextrd - || *op == SseOpcode::Pshufd - || *op == SseOpcode::Roundss - || *op == SseOpcode::Roundsd - || *op == SseOpcode::Roundps - || *op == SseOpcode::Roundpd - { - src2.get_regs_as_uses(collector); - collector.add_def(*dst); + collector.reg_def(*dst); + } else if !op.uses_src1() { + // FIXME: split this instruction into two, so we don't + // need this awkward src1-is-only-sometimes-an-arg + // behavior. + collector.reg_def(*dst); + src2.get_operands(collector); } else { - src2.get_regs_as_uses(collector); - collector.add_mod(*dst); + collector.reg_use(*src1); + collector.reg_reuse_def(*dst, 0); + src2.get_operands(collector); } } - Inst::XmmUninitializedValue { dst } => collector.add_def(dst.to_writable_reg()), - Inst::XmmLoadConst { dst, .. } => collector.add_def(*dst), + Inst::XmmUninitializedValue { dst } => collector.reg_def(dst.to_writable_reg()), + Inst::XmmLoadConst { dst, .. } => collector.reg_def(*dst), Inst::XmmMinMaxSeq { lhs, rhs, dst, .. } => { - debug_assert_eq!(*rhs, dst.to_reg()); - collector.add_use(lhs.to_reg()); - collector.add_mod(dst.to_writable_reg()); + collector.reg_use(rhs.to_reg()); + collector.reg_use(lhs.to_reg()); + collector.reg_reuse_def(dst.to_writable_reg(), 0); // Reuse RHS. } Inst::XmmRmiReg { src1, src2, dst, .. } => { - debug_assert_eq!(*src1, dst.to_reg()); - src2.clone().to_reg_mem_imm().get_regs_as_uses(collector); - collector.add_mod(dst.to_writable_reg()); + collector.reg_use(src1.to_reg()); + collector.reg_reuse_def(dst.to_writable_reg(), 0); // Reuse RHS. + src2.get_operands(collector); } Inst::XmmMovRM { src, dst, .. } => { - collector.add_use(*src); - dst.get_regs_as_uses(collector); + collector.reg_use(*src); + dst.get_operands(collector); } Inst::XmmCmpRmR { src, dst, .. } => { - src.get_regs_as_uses(collector); - collector.add_use(dst.to_reg()); + collector.reg_use(dst.to_reg()); + src.get_operands(collector); } Inst::Imm { dst, .. } => { - collector.add_def(dst.to_writable_reg()); + collector.reg_def(dst.to_writable_reg()); } Inst::MovRR { src, dst, .. } => { - collector.add_use(src.to_reg()); - collector.add_def(dst.to_writable_reg()); + collector.reg_use(src.to_reg()); + collector.reg_def(dst.to_writable_reg()); } Inst::XmmToGpr { src, dst, .. } => { - collector.add_use(src.to_reg()); - collector.add_def(dst.to_writable_reg()); + collector.reg_use(src.to_reg()); + collector.reg_def(dst.to_writable_reg()); } Inst::GprToXmm { src, dst, .. } => { - src.get_regs_as_uses(collector); - collector.add_def(dst.to_writable_reg()); + collector.reg_def(dst.to_writable_reg()); + src.get_operands(collector); } Inst::CvtUint64ToFloatSeq { src, @@ -1984,10 +1869,10 @@ fn x64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) { tmp_gpr2, .. } => { - collector.add_mod(src.to_writable_reg()); - collector.add_def(dst.to_writable_reg()); - collector.add_def(tmp_gpr1.to_writable_reg()); - collector.add_def(tmp_gpr2.to_writable_reg()); + collector.reg_mod(src.to_writable_reg()); + collector.reg_def(dst.to_writable_reg()); + collector.reg_early_def(tmp_gpr1.to_writable_reg()); + collector.reg_early_def(tmp_gpr2.to_writable_reg()); } Inst::CvtFloatToSintSeq { src, @@ -2003,74 +1888,86 @@ fn x64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) { tmp_xmm, .. } => { - collector.add_mod(src.to_writable_reg()); - collector.add_def(dst.to_writable_reg()); - collector.add_def(tmp_gpr.to_writable_reg()); - collector.add_def(tmp_xmm.to_writable_reg()); + collector.reg_mod(src.to_writable_reg()); + collector.reg_def(dst.to_writable_reg()); + collector.reg_early_def(tmp_gpr.to_writable_reg()); + collector.reg_early_def(tmp_xmm.to_writable_reg()); } Inst::MovzxRmR { src, dst, .. } => { - src.get_regs_as_uses(collector); - collector.add_def(dst.to_writable_reg()); + collector.reg_def(dst.to_writable_reg()); + src.get_operands(collector); } Inst::Mov64MR { src, dst, .. } => { - src.get_regs_as_uses(collector); - collector.add_def(dst.to_writable_reg()) + collector.reg_def(dst.to_writable_reg()); + src.get_operands(collector); } Inst::LoadEffectiveAddress { addr: src, dst } => { - src.get_regs_as_uses(collector); - collector.add_def(dst.to_writable_reg()) + collector.reg_def(dst.to_writable_reg()); + src.get_operands(collector); } Inst::MovsxRmR { src, dst, .. } => { - src.get_regs_as_uses(collector); - collector.add_def(dst.to_writable_reg()); + collector.reg_def(dst.to_writable_reg()); + src.get_operands(collector); } Inst::MovRM { src, dst, .. } => { - collector.add_use(src.to_reg()); - dst.get_regs_as_uses(collector); + collector.reg_use(src.to_reg()); + dst.get_operands(collector); } - Inst::ShiftR { num_bits, dst, .. } => { + Inst::ShiftR { + num_bits, src, dst, .. + } => { + collector.reg_use(src.to_reg()); + collector.reg_reuse_def(dst.to_writable_reg(), 0); if let Imm8Reg::Reg { reg } = num_bits.clone().to_imm8_reg() { - debug_assert_eq!(reg, regs::rcx()); - collector.add_use(regs::rcx()); + collector.reg_fixed_use(reg, regs::rcx()); } - collector.add_mod(dst.to_writable_reg()); } Inst::CmpRmiR { src, dst, .. } => { - src.get_regs_as_uses(collector); - collector.add_use(dst.to_reg()); // yes, really `add_use` + // N.B.: use, not def (cmp doesn't write its result). + collector.reg_use(dst.to_reg()); + src.get_operands(collector); } Inst::Setcc { dst, .. } => { - collector.add_def(dst.to_writable_reg()); + collector.reg_def(dst.to_writable_reg()); } Inst::Cmove { - consequent: src, + consequent, + alternative, dst, .. } => { - src.get_regs_as_uses(collector); - collector.add_mod(dst.to_writable_reg()); + collector.reg_use(alternative.to_reg()); + collector.reg_reuse_def(dst.to_writable_reg(), 0); + consequent.get_operands(collector); } Inst::XmmCmove { - consequent: src, + consequent, + alternative, dst, .. } => { - src.get_regs_as_uses(collector); - collector.add_mod(dst.to_writable_reg()); + collector.reg_use(alternative.to_reg()); + collector.reg_reuse_def(dst.to_writable_reg(), 0); + consequent.get_operands(collector); } Inst::Push64 { src } => { - src.get_regs_as_uses(collector); - collector.add_mod(Writable::from_reg(regs::rsp())); + src.get_operands(collector); } Inst::Pop64 { dst } => { - collector.add_def(dst.to_writable_reg()); + collector.reg_def(dst.to_writable_reg()); } Inst::CallKnown { ref uses, ref defs, .. } => { - collector.add_uses(uses); - collector.add_defs(defs); + for &u in uses { + collector.reg_use(u); + } + for &d in defs { + collector.reg_def(d); + } + // FIXME: keep clobbers separate in the Inst and use + // `reg_clobber()`. } Inst::CallUnknown { @@ -2079,9 +1976,15 @@ fn x64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) { dest, .. } => { - collector.add_uses(uses); - collector.add_defs(defs); - dest.get_regs_as_uses(collector); + dest.get_operands(collector); + for &u in uses { + collector.reg_use(u); + } + for &d in defs { + collector.reg_def(d); + } + // FIXME: keep clobbers separate in the Inst and use + // `reg_clobber()`. } Inst::JmpTableSeq { @@ -2090,17 +1993,17 @@ fn x64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) { ref tmp2, .. } => { - collector.add_use(*idx); - collector.add_def(*tmp1); - collector.add_def(*tmp2); + collector.reg_use(*idx); + collector.reg_early_def(*tmp1); + collector.reg_early_def(*tmp2); } Inst::JmpUnknown { target } => { - target.get_regs_as_uses(collector); + target.get_operands(collector); } Inst::LoadExtName { dst, .. } => { - collector.add_def(*dst); + collector.reg_def(*dst); } Inst::LockCmpxchg { @@ -2110,23 +2013,30 @@ fn x64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) { dst_old, .. } => { - mem.get_regs_as_uses(collector); - collector.add_use(*replacement); - - debug_assert_eq!(*expected, regs::rax()); - debug_assert_eq!(dst_old.to_reg(), regs::rax()); - collector.add_mod(Writable::from_reg(regs::rax())); + collector.reg_use(*replacement); + collector.reg_fixed_use(*expected, regs::rax()); + collector.reg_fixed_def(*dst_old, regs::rax()); + mem.get_operands(collector); } Inst::AtomicRmwSeq { .. } => { - collector.add_use(regs::r9()); - collector.add_use(regs::r10()); - collector.add_def(Writable::from_reg(regs::r11())); - collector.add_def(Writable::from_reg(regs::rax())); + // FIXME: take vreg args, not fixed regs, and just use + // reg_fixed_use here. + collector.reg_use(regs::r9()); + collector.reg_use(regs::r10()); + collector.reg_def(Writable::from_reg(regs::r11())); + collector.reg_def(Writable::from_reg(regs::rax())); } - Inst::Ret - | Inst::EpiloguePlaceholder + Inst::Ret { rets } => { + // The return value(s) are live-out; we represent this + // with register uses on the return instruction. + for &ret in rets { + collector.reg_use(ret); + } + } + + Inst::EpiloguePlaceholder | Inst::JmpKnown { .. } | Inst::JmpIf { .. } | Inst::JmpCond { .. } @@ -2147,457 +2057,15 @@ fn x64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) { // ELF systems; other x86-64 targets with other conventions (i.e., // Windows) use different TLS strategies. for reg in X64ABIMachineSpec::get_regs_clobbered_by_call(CallConv::SystemV) { - collector.add_def(reg); + // FIXME: use actual clobber functionality. + collector.reg_def(reg); } } - Inst::ValueLabelMarker { reg, .. } => { - collector.add_use(*reg); - } - Inst::Unwind { .. } => {} - } -} -//============================================================================= -// Instructions and subcomponents: map_regs - -impl Amode { - fn map_uses(&mut self, map: &RM) { - match self { - Amode::ImmReg { ref mut base, .. } => map.map_use(base), - Amode::ImmRegRegShift { - ref mut base, - ref mut index, - .. - } => { - base.map_use(map); - index.map_use(map); - } - Amode::RipRelative { .. } => { - // RIP isn't involved in regalloc. - } - } - } - - /// Offset the amode by a fixed offset. - pub(crate) fn offset(&self, offset: u32) -> Self { - let mut ret = self.clone(); - match &mut ret { - &mut Amode::ImmReg { ref mut simm32, .. } => *simm32 += offset, - &mut Amode::ImmRegRegShift { ref mut simm32, .. } => *simm32 += offset, - _ => panic!("Cannot offset amode: {:?}", self), - } - ret - } -} - -impl RegMemImm { - fn map_uses(&mut self, map: &RM) { - match self { - RegMemImm::Reg { ref mut reg } => map.map_use(reg), - RegMemImm::Mem { ref mut addr } => addr.map_uses(map), - RegMemImm::Imm { .. } => {} - } - } - - fn map_as_def(&mut self, mapper: &RM) { - match self { - Self::Reg { reg } => { - let mut writable_src = Writable::from_reg(*reg); - mapper.map_def(&mut writable_src); - *self = Self::reg(writable_src.to_reg()); - } - _ => panic!("unexpected RegMemImm kind in map_src_reg_as_def"), - } - } -} - -impl RegMem { - fn map_uses(&mut self, map: &RM) { - match self { - RegMem::Reg { ref mut reg } => map.map_use(reg), - RegMem::Mem { ref mut addr, .. } => addr.map_uses(map), - } - } - - fn map_as_def(&mut self, mapper: &RM) { - match self { - Self::Reg { reg } => { - let mut writable_src = Writable::from_reg(*reg); - mapper.map_def(&mut writable_src); - *self = Self::reg(writable_src.to_reg()); - } - _ => panic!("unexpected RegMem kind in map_src_reg_as_def"), - } - } -} - -pub(crate) fn x64_map_regs(inst: &mut Inst, mapper: &RM) { - // Note this must be carefully synchronized with x64_get_regs. - let produces_const = inst.produces_const(); - - match inst { - // ** Nop - Inst::AluRmiR { - ref mut src1, - ref mut src2, - ref mut dst, - .. - } => { - debug_assert_eq!(*src1, dst.to_reg()); - if produces_const { - src2.map_as_def(mapper); - dst.map_def(mapper); - *src1 = dst.to_reg(); - } else { - src2.map_uses(mapper); - dst.map_mod(mapper); - *src1 = dst.to_reg(); - } - } - Inst::Not { src, dst, .. } | Inst::Neg { src, dst, .. } => { - debug_assert_eq!(*src, dst.to_reg()); - dst.map_mod(mapper); - *src = dst.to_reg(); - } - Inst::Div { divisor, .. } => divisor.map_uses(mapper), - Inst::MulHi { src2, .. } => src2.map_uses(mapper), - Inst::CheckedDivOrRemSeq { divisor, tmp, .. } => { - divisor.map_mod(mapper); - if let Some(tmp) = tmp { - tmp.map_def(mapper) - } - } - Inst::SignExtendData { .. } => {} - Inst::XmmUnaryRmR { - ref mut src, - ref mut dst, - .. - } - | Inst::XmmUnaryRmREvex { - ref mut src, - ref mut dst, - .. - } => { - src.map_uses(mapper); - dst.map_def(mapper); - } - Inst::UnaryRmR { - ref mut src, - ref mut dst, - .. - } => { - src.map_uses(mapper); - dst.map_def(mapper); - } - Inst::XmmRmRImm { - ref op, - ref mut src1, - ref mut src2, - ref mut dst, - .. - } => { - debug_assert_eq!(*src1, dst.to_reg()); - if produces_const { - src2.map_as_def(mapper); - mapper.map_def(dst); - *src1 = dst.to_reg(); - } else if *op == SseOpcode::Pextrb - || *op == SseOpcode::Pextrw - || *op == SseOpcode::Pextrd - || *op == SseOpcode::Pshufd - || *op == SseOpcode::Roundss - || *op == SseOpcode::Roundsd - || *op == SseOpcode::Roundps - || *op == SseOpcode::Roundpd - { - src2.map_uses(mapper); - mapper.map_def(dst); - *src1 = dst.to_reg(); - } else { - src2.map_uses(mapper); - mapper.map_mod(dst); - *src1 = dst.to_reg(); - } - } - Inst::XmmRmR { - ref mut src1, - ref mut src2, - ref mut dst, - .. - } => { - debug_assert_eq!(*src1, dst.to_reg()); - if produces_const { - src2.map_as_def(mapper); - dst.map_def(mapper); - *src1 = dst.to_reg(); - } else { - src2.map_uses(mapper); - dst.map_mod(mapper); - *src1 = dst.to_reg(); - } - } - Inst::XmmRmREvex { - op, - ref mut src1, - ref mut src2, - ref mut dst, - .. - } => { - src1.map_uses(mapper); - src2.map_use(mapper); - match *op { - Avx512Opcode::Vpermi2b => dst.map_mod(mapper), - _ => dst.map_def(mapper), - } - } - Inst::XmmRmiReg { - ref mut src1, - ref mut src2, - ref mut dst, - .. - } => { - debug_assert_eq!(*src1, dst.to_reg()); - src2.map_uses(mapper); - dst.map_mod(mapper); - *src1 = dst.to_reg(); - } - Inst::XmmUninitializedValue { ref mut dst, .. } => { - dst.map_def(mapper); - } - Inst::XmmLoadConst { ref mut dst, .. } => { - mapper.map_def(dst); - } - Inst::XmmMinMaxSeq { - ref mut lhs, - ref mut rhs, - ref mut dst, - .. - } => { - debug_assert_eq!(*rhs, dst.to_reg()); - lhs.map_use(mapper); - dst.map_mod(mapper); - *rhs = dst.to_reg(); - } - Inst::XmmMovRM { - ref mut src, - ref mut dst, - .. - } => { - mapper.map_use(src); - dst.map_uses(mapper); - } - Inst::XmmCmpRmR { - ref mut src, - ref mut dst, - .. - } => { - src.map_uses(mapper); - dst.map_use(mapper); - } - Inst::Imm { ref mut dst, .. } => dst.map_def(mapper), - Inst::MovRR { - ref mut src, - ref mut dst, - .. - } => { - src.map_use(mapper); - dst.map_def(mapper); - } - Inst::XmmToGpr { - ref mut src, - ref mut dst, - .. - } => { - src.map_use(mapper); - dst.map_def(mapper); - } - Inst::GprToXmm { - ref mut src, - ref mut dst, - .. - } => { - src.map_uses(mapper); - dst.map_def(mapper); - } - Inst::CvtUint64ToFloatSeq { - ref mut src, - ref mut dst, - ref mut tmp_gpr1, - ref mut tmp_gpr2, - .. - } => { - src.map_mod(mapper); - dst.map_def(mapper); - tmp_gpr1.map_def(mapper); - tmp_gpr2.map_def(mapper); - } - Inst::CvtFloatToSintSeq { - ref mut src, - ref mut dst, - ref mut tmp_xmm, - ref mut tmp_gpr, - .. - } - | Inst::CvtFloatToUintSeq { - ref mut src, - ref mut dst, - ref mut tmp_gpr, - ref mut tmp_xmm, - .. - } => { - src.map_mod(mapper); - dst.map_def(mapper); - tmp_gpr.map_def(mapper); - tmp_xmm.map_def(mapper); - } - Inst::MovzxRmR { - ref mut src, - ref mut dst, - .. - } => { - src.map_uses(mapper); - dst.map_def(mapper); - } - Inst::Mov64MR { src, dst, .. } => { - src.map_uses(mapper); - dst.map_def(mapper); - } - Inst::LoadEffectiveAddress { addr: src, dst } => { - src.map_uses(mapper); - dst.map_def(mapper); - } - Inst::MovsxRmR { - ref mut src, - ref mut dst, - .. - } => { - src.map_uses(mapper); - dst.map_def(mapper); - } - Inst::MovRM { - ref mut src, - ref mut dst, - .. - } => { - src.map_use(mapper); - dst.map_uses(mapper); - } - Inst::ShiftR { - ref mut src, - ref mut dst, - .. - } => { - debug_assert_eq!(*src, dst.to_reg()); - dst.map_mod(mapper); - *src = dst.to_reg(); - } - Inst::CmpRmiR { - ref mut src, - ref mut dst, - .. - } => { - src.map_uses(mapper); - dst.map_use(mapper); - } - Inst::Setcc { ref mut dst, .. } => dst.map_def(mapper), - Inst::Cmove { - consequent: ref mut src, - ref mut dst, - ref mut alternative, - .. - } => { - src.map_uses(mapper); - dst.map_mod(mapper); - *alternative = dst.to_reg(); - } - Inst::XmmCmove { - consequent: ref mut src, - ref mut dst, - ref mut alternative, - .. - } => { - src.map_uses(mapper); - dst.map_mod(mapper); - *alternative = dst.to_reg(); - } - Inst::Push64 { ref mut src } => src.map_uses(mapper), - Inst::Pop64 { ref mut dst } => { - dst.map_def(mapper); - } - - Inst::CallKnown { - ref mut uses, - ref mut defs, - .. - } => { - for r in uses.iter_mut() { - mapper.map_use(r); - } - for r in defs.iter_mut() { - mapper.map_def(r); - } - } - - Inst::CallUnknown { - ref mut uses, - ref mut defs, - ref mut dest, - .. - } => { - for r in uses.iter_mut() { - mapper.map_use(r); - } - for r in defs.iter_mut() { - mapper.map_def(r); - } - dest.map_uses(mapper); - } - - Inst::JmpTableSeq { - ref mut idx, - ref mut tmp1, - ref mut tmp2, - .. - } => { - mapper.map_use(idx); - mapper.map_def(tmp1); - mapper.map_def(tmp2); - } - - Inst::JmpUnknown { ref mut target } => target.map_uses(mapper), - - Inst::LoadExtName { ref mut dst, .. } => mapper.map_def(dst), - - Inst::LockCmpxchg { - ref mut replacement, - ref mut mem, - .. - } => { - mapper.map_use(replacement); - mem.map_uses(mapper); - } - - Inst::ValueLabelMarker { ref mut reg, .. } => mapper.map_use(reg), - - Inst::Ret - | Inst::EpiloguePlaceholder - | Inst::JmpKnown { .. } - | Inst::JmpCond { .. } - | Inst::JmpIf { .. } - | Inst::Nop { .. } - | Inst::TrapIf { .. } - | Inst::VirtualSPOffsetAdj { .. } - | Inst::Ud2 { .. } - | Inst::Hlt - | Inst::AtomicRmwSeq { .. } - | Inst::ElfTlsGetAddr { .. } - | Inst::MachOTlsGetAddr { .. } - | Inst::Fence { .. } - | Inst::Unwind { .. } => { - // Instruction doesn't explicitly mention any regs, so it can't have any virtual - // regs that we'd need to remap. Hence no action required. + Inst::DummyUse { reg } => { + collector.reg_use(*reg); } } } @@ -2606,15 +2074,8 @@ pub(crate) fn x64_map_regs(inst: &mut Inst, mapper: &RM) { // Instructions: misc functions and external interface impl MachInst for Inst { - fn get_regs(&self, collector: &mut RegUsageCollector) { - x64_get_regs(&self, collector) - } - - fn map_regs(&mut self, mapper: &RUM) - where - RUM: regalloc::RegUsageMapper, - { - x64_map_regs(self, mapper); + fn get_operands VReg>(&self, collector: &mut OperandCollector<'_, F>) { + x64_get_operands(&self, collector) } fn is_move(&self) -> Option<(Writable, Reg)> { @@ -2661,7 +2122,7 @@ impl MachInst for Inst { fn is_term<'a>(&'a self) -> MachTerminator<'a> { match self { // Interesting cases. - &Self::Ret | &Self::EpiloguePlaceholder => MachTerminator::Ret, + &Self::Ret { .. } | &Self::EpiloguePlaceholder => MachTerminator::Ret, &Self::JmpKnown { dst } => MachTerminator::Uncond(dst), &Self::JmpCond { taken, not_taken, .. @@ -2675,28 +2136,6 @@ impl MachInst for Inst { } } - fn stack_op_info(&self) -> Option { - match self { - Self::VirtualSPOffsetAdj { offset } => Some(MachInstStackOpInfo::NomSPAdj(*offset)), - Self::MovRM { - size: OperandSize::Size8, - src, - dst: SyntheticAmode::NominalSPOffset { simm32 }, - } => Some(MachInstStackOpInfo::StoreNomSPOff( - src.to_reg(), - *simm32 as i64, - )), - Self::Mov64MR { - src: SyntheticAmode::NominalSPOffset { simm32 }, - dst, - } => Some(MachInstStackOpInfo::LoadNomSPOff( - dst.to_reg().to_reg(), - *simm32 as i64, - )), - _ => None, - } - } - fn gen_move(dst_reg: Writable, src_reg: Reg, ty: Type) -> Inst { log::trace!( "Inst::gen_move {:?} -> {:?} (type: {:?})", @@ -2704,13 +2143,13 @@ impl MachInst for Inst { dst_reg.to_reg(), ty ); - let rc_dst = dst_reg.to_reg().get_class(); - let rc_src = src_reg.get_class(); + let rc_dst = dst_reg.to_reg().class(); + let rc_src = src_reg.class(); // If this isn't true, we have gone way off the rails. debug_assert!(rc_dst == rc_src); match rc_dst { - RegClass::I64 => Inst::mov_r_r(OperandSize::Size64, src_reg, dst_reg), - RegClass::V128 => { + RegClass::Int => Inst::mov_r_r(OperandSize::Size64, src_reg, dst_reg), + RegClass::Float => { // The Intel optimization manual, in "3.5.1.13 Zero-Latency MOV Instructions", // doesn't include MOVSS/MOVSD as instructions with zero-latency. Use movaps for // those, which may write more lanes that we need, but are specified to have @@ -2723,7 +2162,6 @@ impl MachInst for Inst { }; Inst::xmm_unary_rm_r(opcode, RegMem::reg(src_reg), dst_reg) } - _ => panic!("gen_move(x64): unhandled regclass {:?}", rc_dst), } } @@ -2731,32 +2169,28 @@ impl MachInst for Inst { Inst::nop(std::cmp::min(preferred_size, 15) as u8) } - fn maybe_direct_reload(&self, _reg: VirtualReg, _slot: SpillSlot) -> Option { - None - } - fn rc_for_type(ty: Type) -> CodegenResult<(&'static [RegClass], &'static [Type])> { match ty { - types::I8 => Ok((&[RegClass::I64], &[types::I8])), - types::I16 => Ok((&[RegClass::I64], &[types::I16])), - types::I32 => Ok((&[RegClass::I64], &[types::I32])), - types::I64 => Ok((&[RegClass::I64], &[types::I64])), - types::B1 => Ok((&[RegClass::I64], &[types::B1])), - types::B8 => Ok((&[RegClass::I64], &[types::B8])), - types::B16 => Ok((&[RegClass::I64], &[types::B16])), - types::B32 => Ok((&[RegClass::I64], &[types::B32])), - types::B64 => Ok((&[RegClass::I64], &[types::B64])), + types::I8 => Ok((&[RegClass::Int], &[types::I8])), + types::I16 => Ok((&[RegClass::Int], &[types::I16])), + types::I32 => Ok((&[RegClass::Int], &[types::I32])), + types::I64 => Ok((&[RegClass::Int], &[types::I64])), + types::B1 => Ok((&[RegClass::Int], &[types::B1])), + types::B8 => Ok((&[RegClass::Int], &[types::B8])), + types::B16 => Ok((&[RegClass::Int], &[types::B16])), + types::B32 => Ok((&[RegClass::Int], &[types::B32])), + types::B64 => Ok((&[RegClass::Int], &[types::B64])), types::R32 => panic!("32-bit reftype pointer should never be seen on x86-64"), - types::R64 => Ok((&[RegClass::I64], &[types::R64])), - types::F32 => Ok((&[RegClass::V128], &[types::F32])), - types::F64 => Ok((&[RegClass::V128], &[types::F64])), - types::I128 => Ok((&[RegClass::I64, RegClass::I64], &[types::I64, types::I64])), - types::B128 => Ok((&[RegClass::I64, RegClass::I64], &[types::B64, types::B64])), + types::R64 => Ok((&[RegClass::Int], &[types::R64])), + types::F32 => Ok((&[RegClass::Float], &[types::F32])), + types::F64 => Ok((&[RegClass::Float], &[types::F64])), + types::I128 => Ok((&[RegClass::Int, RegClass::Int], &[types::I64, types::I64])), + types::B128 => Ok((&[RegClass::Int, RegClass::Int], &[types::B64, types::B64])), _ if ty.is_vector() => { assert!(ty.bits() <= 128); - Ok((&[RegClass::V128], &[types::I8X16])) + Ok((&[RegClass::Float], &[types::I8X16])) } - types::IFLAGS | types::FFLAGS => Ok((&[RegClass::I64], &[types::I64])), + types::IFLAGS | types::FFLAGS => Ok((&[RegClass::Int], &[types::I64])), _ => Err(CodegenError::Unsupported(format!( "Unexpected SSA-value type: {}", ty @@ -2764,6 +2198,13 @@ impl MachInst for Inst { } } + fn canonical_type_for_rc(rc: RegClass) -> Type { + match rc { + RegClass::Float => types::I8X16, + RegClass::Int => types::I64, + } + } + fn gen_jump(label: MachLabel) -> Inst { Inst::jmp_known(label) } @@ -2877,22 +2318,25 @@ impl MachInst for Inst { ret } + fn gen_dummy_use(reg: Reg) -> Self { + Inst::DummyUse { reg } + } + fn worst_case_size() -> CodeOffset { 15 } fn ref_type_regclass(_: &settings::Flags) -> RegClass { - RegClass::I64 + RegClass::Int } - fn gen_value_label_marker(label: ValueLabel, reg: Reg) -> Self { - Inst::ValueLabelMarker { label, reg } - } - - fn defines_value_label(&self) -> Option<(ValueLabel, Reg)> { + fn is_safepoint(&self) -> bool { match self { - Inst::ValueLabelMarker { label, reg } => Some((*label, *reg)), - _ => None, + Inst::CallKnown { .. } + | Inst::CallUnknown { .. } + | Inst::TrapIf { .. } + | Inst::Ud2 { .. } => true, + _ => false, } } @@ -2929,12 +2373,19 @@ impl MachInstEmit for Inst { type State = EmitState; type Info = EmitInfo; - fn emit(&self, sink: &mut MachBuffer, info: &Self::Info, state: &mut Self::State) { - emit::emit(self, sink, info, state); + fn emit( + &self, + allocs: &[Allocation], + sink: &mut MachBuffer, + info: &Self::Info, + state: &mut Self::State, + ) { + let mut allocs = AllocationConsumer::new(allocs); + emit::emit(self, &mut allocs, sink, info, state); } - fn pretty_print(&self, mb_rru: Option<&RealRegUniverse>, _: &mut Self::State) -> String { - self.show_rru(mb_rru) + fn pretty_print_inst(&self, allocs: &[Allocation], _: &mut Self::State) -> String { + PrettyPrint::pretty_print(self, 0, &mut AllocationConsumer::new(allocs)) } } diff --git a/cranelift/codegen/src/isa/x64/inst/regs.rs b/cranelift/codegen/src/isa/x64/inst/regs.rs index ddcf3adc2d..d720951f57 100644 --- a/cranelift/codegen/src/isa/x64/inst/regs.rs +++ b/cranelift/codegen/src/isa/x64/inst/regs.rs @@ -1,26 +1,14 @@ -//! Registers, the Universe thereof, and printing. +//! Register definitions for regalloc2. //! -//! These are ordered by sequence number, as required in the Universe. +//! We define 16 GPRs, with indices equal to the hardware encoding, +//! and 16 XMM registers. //! -//! The caller-saved registers are placed first in order to prefer not to clobber (requiring -//! saves/restores in prologue/epilogue code) when possible. Note that there is no other heuristic -//! in the backend that will apply such pressure; the register allocator's cost heuristics are not -//! aware of the cost of clobber-save/restore code. -//! -//! One might worry that this pessimizes code with many callsites, where using caller-saves causes -//! us to have to save them (as we are the caller) frequently. However, the register allocator -//! *should be* aware of *this* cost, because it sees that the call instruction modifies all of the -//! caller-saved (i.e., callee-clobbered) registers. -//! -//! Hence, this ordering encodes pressure in one direction (prefer not to clobber registers that we -//! ourselves have to save) and this is balanaced against the RA's pressure in the other direction -//! at callsites. +//! Note also that we make use of pinned VRegs to refer to PRegs. +use crate::machinst::{AllocationConsumer, RealReg, Reg}; use crate::settings; -use alloc::vec::Vec; -use regalloc::{ - PrettyPrint, RealReg, RealRegUniverse, Reg, RegClass, RegClassInfo, NUM_REG_CLASSES, -}; +use alloc::string::ToString; +use regalloc2::{MachineEnv, PReg, RegClass, VReg}; use std::string::String; // Hardware encodings (note the special rax, rcx, rdx, rbx order). @@ -42,53 +30,62 @@ pub const ENC_R13: u8 = 13; pub const ENC_R14: u8 = 14; pub const ENC_R15: u8 = 15; -fn gpr(enc: u8, index: u8) -> Reg { - Reg::new_real(RegClass::I64, enc, index) +// Constructors for Regs. + +fn gpr(enc: u8) -> Reg { + let preg = PReg::new(enc as usize, RegClass::Int); + Reg::from(VReg::new(preg.index(), RegClass::Int)) } pub(crate) fn rsi() -> Reg { - gpr(ENC_RSI, 16) + gpr(ENC_RSI) } pub(crate) fn rdi() -> Reg { - gpr(ENC_RDI, 17) + gpr(ENC_RDI) } pub(crate) fn rax() -> Reg { - gpr(ENC_RAX, 18) + gpr(ENC_RAX) } pub(crate) fn rcx() -> Reg { - gpr(ENC_RCX, 19) + gpr(ENC_RCX) } pub(crate) fn rdx() -> Reg { - gpr(ENC_RDX, 20) + gpr(ENC_RDX) } pub(crate) fn r8() -> Reg { - gpr(ENC_R8, 21) + gpr(ENC_R8) } pub(crate) fn r9() -> Reg { - gpr(ENC_R9, 22) + gpr(ENC_R9) } pub(crate) fn r10() -> Reg { - gpr(ENC_R10, 23) + gpr(ENC_R10) } pub(crate) fn r11() -> Reg { - gpr(ENC_R11, 24) + gpr(ENC_R11) } pub(crate) fn r12() -> Reg { - gpr(ENC_R12, 25) + gpr(ENC_R12) } pub(crate) fn r13() -> Reg { - gpr(ENC_R13, 26) + gpr(ENC_R13) } pub(crate) fn r14() -> Reg { - gpr(ENC_R14, 27) + gpr(ENC_R14) } pub(crate) fn rbx() -> Reg { - gpr(ENC_RBX, 28) + gpr(ENC_RBX) } pub(crate) fn r15() -> Reg { - // r15 is put aside since this is the pinned register. - gpr(ENC_R15, 29) + gpr(ENC_R15) +} + +pub(crate) fn rsp() -> Reg { + gpr(ENC_RSP) +} +pub(crate) fn rbp() -> Reg { + gpr(ENC_RBP) } /// The pinned register on this architecture. @@ -98,163 +95,177 @@ pub(crate) fn pinned_reg() -> Reg { r15() } -fn fpr(enc: u8, index: u8) -> Reg { - Reg::new_real(RegClass::V128, enc, index) +fn fpr(enc: u8) -> Reg { + let preg = PReg::new(enc as usize, RegClass::Float); + Reg::from(VReg::new(preg.index(), RegClass::Float)) } pub(crate) fn xmm0() -> Reg { - fpr(0, 0) + fpr(0) } pub(crate) fn xmm1() -> Reg { - fpr(1, 1) + fpr(1) } pub(crate) fn xmm2() -> Reg { - fpr(2, 2) + fpr(2) } pub(crate) fn xmm3() -> Reg { - fpr(3, 3) + fpr(3) } pub(crate) fn xmm4() -> Reg { - fpr(4, 4) + fpr(4) } pub(crate) fn xmm5() -> Reg { - fpr(5, 5) + fpr(5) } pub(crate) fn xmm6() -> Reg { - fpr(6, 6) + fpr(6) } pub(crate) fn xmm7() -> Reg { - fpr(7, 7) + fpr(7) } pub(crate) fn xmm8() -> Reg { - fpr(8, 8) + fpr(8) } pub(crate) fn xmm9() -> Reg { - fpr(9, 9) + fpr(9) } pub(crate) fn xmm10() -> Reg { - fpr(10, 10) + fpr(10) } pub(crate) fn xmm11() -> Reg { - fpr(11, 11) + fpr(11) } pub(crate) fn xmm12() -> Reg { - fpr(12, 12) + fpr(12) } pub(crate) fn xmm13() -> Reg { - fpr(13, 13) + fpr(13) } pub(crate) fn xmm14() -> Reg { - fpr(14, 14) + fpr(14) } pub(crate) fn xmm15() -> Reg { - fpr(15, 15) + fpr(15) } -pub(crate) fn rsp() -> Reg { - gpr(ENC_RSP, 30) -} -pub(crate) fn rbp() -> Reg { - gpr(ENC_RBP, 31) -} - -/// Create the register universe for X64. -/// -/// The ordering of registers matters, as commented in the file doc comment: assumes the -/// calling-convention is SystemV, at the moment. -pub(crate) fn create_reg_universe_systemv(flags: &settings::Flags) -> RealRegUniverse { - let mut regs = Vec::<(RealReg, String)>::new(); - let mut allocable_by_class = [None; NUM_REG_CLASSES]; - - let use_pinned_reg = flags.enable_pinned_reg(); - - // XMM registers - let first_fpr = regs.len(); - regs.push((xmm0().to_real_reg(), "%xmm0".into())); - regs.push((xmm1().to_real_reg(), "%xmm1".into())); - regs.push((xmm2().to_real_reg(), "%xmm2".into())); - regs.push((xmm3().to_real_reg(), "%xmm3".into())); - regs.push((xmm4().to_real_reg(), "%xmm4".into())); - regs.push((xmm5().to_real_reg(), "%xmm5".into())); - regs.push((xmm6().to_real_reg(), "%xmm6".into())); - regs.push((xmm7().to_real_reg(), "%xmm7".into())); - regs.push((xmm8().to_real_reg(), "%xmm8".into())); - regs.push((xmm9().to_real_reg(), "%xmm9".into())); - regs.push((xmm10().to_real_reg(), "%xmm10".into())); - regs.push((xmm11().to_real_reg(), "%xmm11".into())); - regs.push((xmm12().to_real_reg(), "%xmm12".into())); - regs.push((xmm13().to_real_reg(), "%xmm13".into())); - regs.push((xmm14().to_real_reg(), "%xmm14".into())); - regs.push((xmm15().to_real_reg(), "%xmm15".into())); - let last_fpr = regs.len() - 1; - - // Integer regs. - let first_gpr = regs.len(); - - // Caller-saved, in the SystemV x86_64 ABI. - regs.push((rsi().to_real_reg(), "%rsi".into())); - regs.push((rdi().to_real_reg(), "%rdi".into())); - regs.push((rax().to_real_reg(), "%rax".into())); - regs.push((rcx().to_real_reg(), "%rcx".into())); - regs.push((rdx().to_real_reg(), "%rdx".into())); - regs.push((r8().to_real_reg(), "%r8".into())); - regs.push((r9().to_real_reg(), "%r9".into())); - regs.push((r10().to_real_reg(), "%r10".into())); - regs.push((r11().to_real_reg(), "%r11".into())); - - // Callee-saved, in the SystemV x86_64 ABI. - regs.push((r12().to_real_reg(), "%r12".into())); - regs.push((r13().to_real_reg(), "%r13".into())); - regs.push((r14().to_real_reg(), "%r14".into())); - - regs.push((rbx().to_real_reg(), "%rbx".into())); - - // Other regs, not available to the allocator. - debug_assert_eq!(r15(), pinned_reg()); - let allocable = if use_pinned_reg { - // The pinned register is not allocatable in this case, so record the length before adding - // it. - let len = regs.len(); - regs.push((r15().to_real_reg(), "%r15/pinned".into())); - len - } else { - regs.push((r15().to_real_reg(), "%r15".into())); - regs.len() - }; - let last_gpr = allocable - 1; - - regs.push((rsp().to_real_reg(), "%rsp".into())); - regs.push((rbp().to_real_reg(), "%rbp".into())); - - allocable_by_class[RegClass::I64.rc_to_usize()] = Some(RegClassInfo { - first: first_gpr, - last: last_gpr, - suggested_scratch: Some(r12().get_index()), - }); - allocable_by_class[RegClass::V128.rc_to_usize()] = Some(RegClassInfo { - first: first_fpr, - last: last_fpr, - suggested_scratch: Some(xmm15().get_index()), - }); - - // Sanity-check: the index passed to the Reg ctor must match the order in the register list. - for (i, reg) in regs.iter().enumerate() { - assert_eq!(i, reg.0.get_index()); +/// Create the register environment for x64. +pub(crate) fn create_reg_env_systemv(flags: &settings::Flags) -> MachineEnv { + fn preg(r: Reg) -> PReg { + r.to_real_reg().unwrap().into() } - RealRegUniverse { - regs, - allocable, - allocable_by_class, + let mut env = MachineEnv { + preferred_regs_by_class: [ + // Preferred GPRs: caller-saved in the SysV ABI. + vec![ + preg(rsi()), + preg(rdi()), + preg(rax()), + preg(rcx()), + preg(rdx()), + preg(r8()), + preg(r9()), + // N.B.: not r10; it is our scratch reg. + preg(r11()), + ], + // Preferred XMMs: all of them. + vec![ + preg(xmm0()), + preg(xmm1()), + preg(xmm2()), + preg(xmm3()), + preg(xmm4()), + preg(xmm5()), + preg(xmm6()), + preg(xmm7()), + preg(xmm8()), + preg(xmm9()), + preg(xmm10()), + preg(xmm11()), + preg(xmm12()), + preg(xmm13()), + preg(xmm14()), + // N.B.: not xmm15; it is our scratch reg. + ], + ], + non_preferred_regs_by_class: [ + // Non-preferred GPRs: callee-saved in the SysV ABI. + vec![preg(rbx()), preg(r12()), preg(r13()), preg(r14())], + // Non-preferred XMMs: none. + vec![], + ], + scratch_by_class: [preg(r10()), preg(xmm15())], + fixed_stack_slots: vec![], + }; + + debug_assert_eq!(r15(), pinned_reg()); + if !flags.enable_pinned_reg() { + env.non_preferred_regs_by_class[0].push(preg(r15())); + } + + env +} + +/// Give the name of a RealReg. +pub fn realreg_name(reg: RealReg) -> &'static str { + let preg = PReg::from(reg); + match preg.class() { + RegClass::Int => match preg.hw_enc() as u8 { + ENC_RAX => "%rax", + ENC_RBX => "%rbx", + ENC_RCX => "%rcx", + ENC_RDX => "%rdx", + ENC_RSI => "%rsi", + ENC_RDI => "%rdi", + ENC_RBP => "%rbp", + ENC_RSP => "%rsp", + ENC_R8 => "%r8", + ENC_R9 => "%r9", + ENC_R10 => "%r10", + ENC_R11 => "%r11", + ENC_R12 => "%r12", + ENC_R13 => "%r13", + ENC_R14 => "%r14", + ENC_R15 => "%r15", + _ => panic!("Invalid PReg: {:?}", preg), + }, + RegClass::Float => match preg.hw_enc() { + 0 => "%xmm0", + 1 => "%xmm1", + 2 => "%xmm2", + 3 => "%xmm3", + 4 => "%xmm4", + 5 => "%xmm5", + 6 => "%xmm6", + 7 => "%xmm7", + 8 => "%xmm8", + 9 => "%xmm9", + 10 => "%xmm10", + 11 => "%xmm11", + 12 => "%xmm12", + 13 => "%xmm13", + 14 => "%xmm14", + 15 => "%xmm15", + _ => panic!("Invalid PReg: {:?}", preg), + }, + } +} + +pub fn show_reg(reg: Reg) -> String { + if let Some(rreg) = reg.to_real_reg() { + realreg_name(rreg).to_string() + } else { + format!("%{:?}", reg) } } /// If `ireg` denotes an I64-classed reg, make a best-effort attempt to show its name at some /// smaller size (4, 2 or 1 bytes). -pub fn show_ireg_sized(reg: Reg, mb_rru: Option<&RealRegUniverse>, size: u8) -> String { - let mut s = reg.show_rru(mb_rru); +pub fn show_ireg_sized(reg: Reg, size: u8) -> String { + let mut s = show_reg(reg); - if reg.get_class() != RegClass::I64 || size == 8 { + if reg.class() != RegClass::Int || size == 8 { // We can't do any better. return s; } @@ -302,3 +313,15 @@ pub fn show_ireg_sized(reg: Reg, mb_rru: Option<&RealRegUniverse>, size: u8) -> s } + +// N.B.: this is not an `impl PrettyPrint for Reg` because it is +// specific to x64; other backends have analogous functions. The +// disambiguation happens statically by virtue of higher-level, +// x64-specific, types calling the right `pretty_print_reg`. (In other +// words, we can't pretty-print a `Reg` all by itself in a build that +// may have multiple backends; but we can pretty-print one as part of +// an x64 Inst or x64 RegMemImm.) +pub fn pretty_print_reg(reg: Reg, size: u8, allocs: &mut AllocationConsumer<'_>) -> String { + let reg = allocs.next(reg); + show_ireg_sized(reg, size) +} diff --git a/cranelift/codegen/src/isa/x64/inst/unwind/systemv.rs b/cranelift/codegen/src/isa/x64/inst/unwind/systemv.rs index 68575a18ed..c28ea3b623 100644 --- a/cranelift/codegen/src/isa/x64/inst/unwind/systemv.rs +++ b/cranelift/codegen/src/isa/x64/inst/unwind/systemv.rs @@ -1,8 +1,8 @@ //! Unwind information for System V ABI (x86-64). use crate::isa::unwind::systemv::RegisterMappingError; +use crate::machinst::{Reg, RegClass}; use gimli::{write::CommonInformationEntry, Encoding, Format, Register, X86_64}; -use regalloc::{Reg, RegClass}; /// Creates a new x86-64 common information entry (CIE). pub fn create_cie() -> CommonInformationEntry { @@ -69,14 +69,13 @@ pub fn map_reg(reg: Reg) -> Result { X86_64::XMM15, ]; - match reg.get_class() { - RegClass::I64 => { + match reg.class() { + RegClass::Int => { // x86 GP registers have a weird mapping to DWARF registers, so we use a // lookup table. - Ok(X86_GP_REG_MAP[reg.get_hw_encoding() as usize]) + Ok(X86_GP_REG_MAP[reg.to_real_reg().unwrap().hw_enc() as usize]) } - RegClass::V128 => Ok(X86_XMM_REG_MAP[reg.get_hw_encoding() as usize]), - _ => Err(RegisterMappingError::UnsupportedRegisterBank("class?")), + RegClass::Float => Ok(X86_XMM_REG_MAP[reg.to_real_reg().unwrap().hw_enc() as usize]), } } diff --git a/cranelift/codegen/src/isa/x64/inst/unwind/winx64.rs b/cranelift/codegen/src/isa/x64/inst/unwind/winx64.rs index ffffc5fef5..dc4e6e2b60 100644 --- a/cranelift/codegen/src/isa/x64/inst/unwind/winx64.rs +++ b/cranelift/codegen/src/isa/x64/inst/unwind/winx64.rs @@ -1,16 +1,15 @@ //! Unwind information for Windows x64 ABI. -use regalloc::{Reg, RegClass}; +use crate::machinst::{Reg, RegClass}; pub(crate) struct RegisterMapper; impl crate::isa::unwind::winx64::RegisterMapper for RegisterMapper { fn map(reg: Reg) -> crate::isa::unwind::winx64::MappedRegister { use crate::isa::unwind::winx64::MappedRegister; - match reg.get_class() { - RegClass::I64 => MappedRegister::Int(reg.get_hw_encoding()), - RegClass::V128 => MappedRegister::Xmm(reg.get_hw_encoding()), - _ => unreachable!(), + match reg.class() { + RegClass::Int => MappedRegister::Int(reg.to_real_reg().unwrap().hw_enc()), + RegClass::Float => MappedRegister::Xmm(reg.to_real_reg().unwrap().hw_enc()), } } } diff --git a/cranelift/codegen/src/isa/x64/lower.isle b/cranelift/codegen/src/isa/x64/lower.isle index 80c99cc70f..42c5e70c49 100644 --- a/cranelift/codegen/src/isa/x64/lower.isle +++ b/cranelift/codegen/src/isa/x64/lower.isle @@ -1432,12 +1432,12 @@ ;;;; Rules for `trap` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule (lower (trap code)) - (safepoint (x64_ud2 code))) + (side_effect (x64_ud2 code))) ;;;; Rules for `resumable_trap` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule (lower (resumable_trap code)) - (safepoint (x64_ud2 code))) + (side_effect (x64_ud2 code))) ;;;; Rules for `icmp` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; diff --git a/cranelift/codegen/src/isa/x64/lower.rs b/cranelift/codegen/src/isa/x64/lower.rs index 48ac3173e5..8af0801732 100644 --- a/cranelift/codegen/src/isa/x64/lower.rs +++ b/cranelift/codegen/src/isa/x64/lower.rs @@ -20,7 +20,6 @@ use crate::settings::{Flags, TlsModel}; use alloc::boxed::Box; use alloc::vec::Vec; use log::trace; -use regalloc::{Reg, RegClass, Writable}; use smallvec::SmallVec; use std::convert::TryFrom; use target_lexicon::Triple; @@ -1005,7 +1004,7 @@ fn lower_insn_to_regs>( // simply use the flags here. let cc = CC::from_intcc(cond_code); - ctx.emit_safepoint(Inst::TrapIf { trap_code, cc }); + ctx.emit(Inst::TrapIf { trap_code, cc }); } else if op == Opcode::Trapif { let cond_code = ctx.data(insn).cond_code().unwrap(); @@ -1014,7 +1013,7 @@ fn lower_insn_to_regs>( let cond_code = emit_cmp(ctx, ifcmp, cond_code); let cc = CC::from_intcc(cond_code); - ctx.emit_safepoint(Inst::TrapIf { trap_code, cc }); + ctx.emit(Inst::TrapIf { trap_code, cc }); } else { let cond_code = ctx.data(insn).fp_cond_code().unwrap(); @@ -1022,9 +1021,7 @@ fn lower_insn_to_regs>( let ffcmp = matches_input(ctx, inputs[0], Opcode::Ffcmp).unwrap(); match emit_fcmp(ctx, ffcmp, cond_code, FcmpSpec::Normal) { - FcmpCondResult::Condition(cc) => { - ctx.emit_safepoint(Inst::TrapIf { trap_code, cc }) - } + FcmpCondResult::Condition(cc) => ctx.emit(Inst::TrapIf { trap_code, cc }), FcmpCondResult::AndConditions(cc1, cc2) => { // A bit unfortunate, but materialize the flags in their own register, and // check against this. @@ -1038,14 +1035,14 @@ fn lower_insn_to_regs>( RegMemImm::reg(tmp.to_reg()), tmp2, )); - ctx.emit_safepoint(Inst::TrapIf { + ctx.emit(Inst::TrapIf { trap_code, cc: CC::NZ, }); } FcmpCondResult::OrConditions(cc1, cc2) => { - ctx.emit_safepoint(Inst::TrapIf { trap_code, cc: cc1 }); - ctx.emit_safepoint(Inst::TrapIf { trap_code, cc: cc2 }); + ctx.emit(Inst::TrapIf { trap_code, cc: cc1 }); + ctx.emit(Inst::TrapIf { trap_code, cc: cc2 }); } FcmpCondResult::InvertedEqualOrConditions(_, _) => unreachable!(), }; @@ -2917,7 +2914,7 @@ fn lower_insn_to_regs>( let src_ty = ctx.input_ty(insn, 0); debug_assert!(src_ty.is_vector() && src_ty.bits() == 128); let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); - debug_assert!(dst.to_reg().get_class() == RegClass::I64); + debug_assert!(dst.to_reg().class() == RegClass::Int); // The Intel specification allows using both 32-bit and 64-bit GPRs as destination for // the "move mask" instructions. This is controlled by the REX.R bit: "In 64-bit mode, diff --git a/cranelift/codegen/src/isa/x64/lower/isle.rs b/cranelift/codegen/src/isa/x64/lower/isle.rs index 28df19f896..c8032ecde9 100644 --- a/cranelift/codegen/src/isa/x64/lower/isle.rs +++ b/cranelift/codegen/src/isa/x64/lower/isle.rs @@ -2,23 +2,23 @@ // Pull in the ISLE generated code. pub(crate) mod generated_code; +use crate::machinst::{Reg, Writable}; use generated_code::MInst; -use regalloc::Writable; // Types that the generated ISLE code uses via `use super::*`. -use super::{is_int_or_ref_ty, is_mergeable_load, lower_to_amode, Reg}; +use super::{is_int_or_ref_ty, is_mergeable_load, lower_to_amode}; use crate::{ ir::{ condcodes::{FloatCC, IntCC}, immediates::*, types::*, - Inst, InstructionData, MemFlags, Opcode, TrapCode, Value, ValueLabel, ValueList, + Inst, InstructionData, MemFlags, Opcode, TrapCode, Value, ValueList, }, isa::{ settings::Flags, unwind::UnwindInst, x64::{ - inst::{args::*, regs, x64_map_regs}, + inst::{args::*, regs}, settings::Flags as IsaFlags, }, }, @@ -45,15 +45,9 @@ pub(crate) fn lower( where C: LowerCtx, { - lower_common( - lower_ctx, - flags, - isa_flags, - outputs, - inst, - |cx, insn| generated_code::constructor_lower(cx, insn), - x64_map_regs, - ) + lower_common(lower_ctx, flags, isa_flags, outputs, inst, |cx, insn| { + generated_code::constructor_lower(cx, insn) + }) } impl generated_code::Context for IsleContext<'_, C, Flags, IsaFlags, 6> @@ -269,17 +263,7 @@ where } fn emit(&mut self, inst: &MInst) -> Unit { - for inst in inst.clone().mov_mitosis() { - self.emitted_insts.push((inst, false)); - } - } - - fn emit_safepoint(&mut self, inst: &MInst) -> Unit { - use crate::machinst::MachInst; - for inst in inst.clone().mov_mitosis() { - let is_safepoint = !inst.is_move().is_some(); - self.emitted_insts.push((inst, is_safepoint)); - } + self.lower_ctx.emit(inst.clone()); } #[inline] diff --git a/cranelift/codegen/src/isa/x64/lower/isle/generated_code.manifest b/cranelift/codegen/src/isa/x64/lower/isle/generated_code.manifest index ad3cfca1f8..245b41e5c2 100644 --- a/cranelift/codegen/src/isa/x64/lower/isle/generated_code.manifest +++ b/cranelift/codegen/src/isa/x64/lower/isle/generated_code.manifest @@ -1,4 +1,4 @@ src/clif.isle 443b34b797fc8ace -src/prelude.isle c0751050a11e2686 -src/isa/x64/inst.isle 1a4206dba9fcf9d8 -src/isa/x64/lower.isle 7e839e6b667bfe77 +src/prelude.isle afd037c4d91c875c +src/isa/x64/inst.isle f3163ebadf210bb0 +src/isa/x64/lower.isle fd63f3801d58180f diff --git a/cranelift/codegen/src/isa/x64/lower/isle/generated_code.rs b/cranelift/codegen/src/isa/x64/lower/isle/generated_code.rs index 81380bb8ab..b2f9e13902 100644 --- a/cranelift/codegen/src/isa/x64/lower/isle/generated_code.rs +++ b/cranelift/codegen/src/isa/x64/lower/isle/generated_code.rs @@ -79,7 +79,6 @@ pub trait Context { fn def_inst(&mut self, arg0: Value) -> Option; fn offset32_to_u32(&mut self, arg0: Offset32) -> u32; fn emit(&mut self, arg0: &MInst) -> Unit; - fn emit_safepoint(&mut self, arg0: &MInst) -> Unit; fn trap_code_division_by_zero(&mut self) -> TrapCode; fn trap_code_integer_overflow(&mut self) -> TrapCode; fn trap_code_bad_conversion_to_integer(&mut self) -> TrapCode; @@ -162,13 +161,13 @@ pub trait Context { fn popcount_low_mask(&mut self) -> VCodeConstant; } -/// Internal type SideEffectNoResult: defined at src/prelude.isle line 405. +/// Internal type SideEffectNoResult: defined at src/prelude.isle line 402. #[derive(Clone, Debug)] pub enum SideEffectNoResult { Inst { inst: MInst }, } -/// Internal type ProducesFlags: defined at src/prelude.isle line 427. +/// Internal type ProducesFlags: defined at src/prelude.isle line 418. #[derive(Clone, Debug)] pub enum ProducesFlags { ProducesFlagsSideEffect { inst: MInst }, @@ -176,7 +175,7 @@ pub enum ProducesFlags { ProducesFlagsReturnsResultWithConsumer { inst: MInst, result: Reg }, } -/// Internal type ConsumesFlags: defined at src/prelude.isle line 438. +/// Internal type ConsumesFlags: defined at src/prelude.isle line 429. #[derive(Clone, Debug)] pub enum ConsumesFlags { ConsumesFlagsReturnsResultWithProducer { @@ -234,7 +233,8 @@ pub enum MInst { size: OperandSize, signed: bool, divisor: GprMem, - dividend: Gpr, + dividend_lo: Gpr, + dividend_hi: Gpr, dst_quotient: WritableGpr, dst_remainder: WritableGpr, }, @@ -249,7 +249,8 @@ pub enum MInst { CheckedDivOrRemSeq { kind: DivOrRemKind, size: OperandSize, - dividend: Gpr, + dividend_lo: Gpr, + dividend_hi: Gpr, divisor: WritableGpr, dst_quotient: WritableGpr, dst_remainder: WritableGpr, @@ -437,7 +438,9 @@ pub enum MInst { defs: VecWritableReg, opcode: Opcode, }, - Ret, + Ret { + rets: VecReg, + }, EpiloguePlaceholder, JmpKnown { dst: MachLabel, @@ -505,16 +508,15 @@ pub enum MInst { MachOTlsGetAddr { symbol: ExternalName, }, - ValueLabelMarker { - reg: Reg, - label: ValueLabel, - }, Unwind { inst: UnwindInst, }, + DummyUse { + reg: Reg, + }, } -/// Internal type ExtendKind: defined at src/isa/x64/inst.isle line 1201. +/// Internal type ExtendKind: defined at src/isa/x64/inst.isle line 1202. #[derive(Copy, Clone, PartialEq, Eq, Debug)] pub enum ExtendKind { Sign, @@ -568,7 +570,7 @@ pub fn constructor_side_effect( inst: ref pattern1_0, } = pattern0_0 { - // Rule at src/prelude.isle line 410. + // Rule at src/prelude.isle line 407. let expr0_0 = C::emit(ctx, pattern1_0); let expr1_0 = C::output_none(ctx); return Some(expr1_0); @@ -576,24 +578,6 @@ pub fn constructor_side_effect( return None; } -// Generated as internal constructor for term safepoint. -pub fn constructor_safepoint( - ctx: &mut C, - arg0: &SideEffectNoResult, -) -> Option { - let pattern0_0 = arg0; - if let &SideEffectNoResult::Inst { - inst: ref pattern1_0, - } = pattern0_0 - { - // Rule at src/prelude.isle line 416. - let expr0_0 = C::emit_safepoint(ctx, pattern1_0); - let expr1_0 = C::output_none(ctx); - return Some(expr1_0); - } - return None; -} - // Generated as internal constructor for term produces_flags_get_reg. pub fn constructor_produces_flags_get_reg( ctx: &mut C, @@ -605,7 +589,7 @@ pub fn constructor_produces_flags_get_reg( result: pattern1_1, } = pattern0_0 { - // Rule at src/prelude.isle line 454. + // Rule at src/prelude.isle line 445. return Some(pattern1_1); } return None; @@ -622,7 +606,7 @@ pub fn constructor_produces_flags_ignore( inst: ref pattern1_0, result: pattern1_1, } => { - // Rule at src/prelude.isle line 459. + // Rule at src/prelude.isle line 450. let expr0_0 = ProducesFlags::ProducesFlagsSideEffect { inst: pattern1_0.clone(), }; @@ -632,7 +616,7 @@ pub fn constructor_produces_flags_ignore( inst: ref pattern1_0, result: pattern1_1, } => { - // Rule at src/prelude.isle line 461. + // Rule at src/prelude.isle line 452. let expr0_0 = ProducesFlags::ProducesFlagsSideEffect { inst: pattern1_0.clone(), }; @@ -661,7 +645,7 @@ pub fn constructor_consumes_flags_concat( result: pattern3_1, } = pattern2_0 { - // Rule at src/prelude.isle line 468. + // Rule at src/prelude.isle line 459. let expr0_0 = C::value_regs(ctx, pattern1_1, pattern3_1); let expr1_0 = ConsumesFlags::ConsumesFlagsTwiceReturnsValueRegs { inst1: pattern1_0.clone(), @@ -691,7 +675,7 @@ pub fn constructor_with_flags( inst: ref pattern3_0, result: pattern3_1, } => { - // Rule at src/prelude.isle line 493. + // Rule at src/prelude.isle line 484. let expr0_0 = C::emit(ctx, pattern1_0); let expr1_0 = C::emit(ctx, pattern3_0); let expr2_0 = C::value_reg(ctx, pattern3_1); @@ -702,7 +686,7 @@ pub fn constructor_with_flags( inst2: ref pattern3_1, result: pattern3_2, } => { - // Rule at src/prelude.isle line 499. + // Rule at src/prelude.isle line 490. let expr0_0 = C::emit(ctx, pattern1_0); let expr1_0 = C::emit(ctx, pattern3_0); let expr2_0 = C::emit(ctx, pattern3_1); @@ -715,7 +699,7 @@ pub fn constructor_with_flags( inst4: ref pattern3_3, result: pattern3_4, } => { - // Rule at src/prelude.isle line 511. + // Rule at src/prelude.isle line 502. let expr0_0 = C::emit(ctx, pattern1_0); let expr1_0 = C::emit(ctx, pattern3_0); let expr2_0 = C::emit(ctx, pattern3_1); @@ -736,7 +720,7 @@ pub fn constructor_with_flags( result: pattern3_1, } = pattern2_0 { - // Rule at src/prelude.isle line 487. + // Rule at src/prelude.isle line 478. let expr0_0 = C::emit(ctx, pattern1_0); let expr1_0 = C::emit(ctx, pattern3_0); let expr2_0 = C::value_regs(ctx, pattern1_1, pattern3_1); @@ -756,7 +740,7 @@ pub fn constructor_with_flags_reg( ) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/prelude.isle line 528. + // Rule at src/prelude.isle line 519. let expr0_0 = constructor_with_flags(ctx, pattern0_0, pattern1_0)?; let expr1_0: usize = 0; let expr2_0 = C::value_regs_get(ctx, expr0_0, expr1_0); @@ -768,22 +752,22 @@ pub fn constructor_operand_size_bits(ctx: &mut C, arg0: &OperandSize let pattern0_0 = arg0; match pattern0_0 { &OperandSize::Size8 => { - // Rule at src/isa/x64/inst.isle line 510. + // Rule at src/isa/x64/inst.isle line 511. let expr0_0: u16 = 8; return Some(expr0_0); } &OperandSize::Size16 => { - // Rule at src/isa/x64/inst.isle line 511. + // Rule at src/isa/x64/inst.isle line 512. let expr0_0: u16 = 16; return Some(expr0_0); } &OperandSize::Size32 => { - // Rule at src/isa/x64/inst.isle line 512. + // Rule at src/isa/x64/inst.isle line 513. let expr0_0: u16 = 32; return Some(expr0_0); } &OperandSize::Size64 => { - // Rule at src/isa/x64/inst.isle line 513. + // Rule at src/isa/x64/inst.isle line 514. let expr0_0: u16 = 64; return Some(expr0_0); } @@ -802,7 +786,7 @@ pub fn constructor_amode_imm_reg_flags( let pattern0_0 = arg0; let pattern1_0 = arg1; let pattern2_0 = arg2; - // Rule at src/isa/x64/inst.isle line 770. + // Rule at src/isa/x64/inst.isle line 771. let expr0_0 = C::amode_imm_reg(ctx, pattern0_0, pattern1_0); let expr1_0 = C::amode_with_flags(ctx, &expr0_0, pattern2_0); return Some(expr1_0); @@ -822,7 +806,7 @@ pub fn constructor_amode_imm_reg_reg_shift_flags( let pattern2_0 = arg2; let pattern3_0 = arg3; let pattern4_0 = arg4; - // Rule at src/isa/x64/inst.isle line 777. + // Rule at src/isa/x64/inst.isle line 778. let expr0_0 = C::amode_imm_reg_reg_shift(ctx, pattern0_0, pattern1_0, pattern2_0, pattern3_0); let expr1_0 = C::amode_with_flags(ctx, &expr0_0, pattern4_0); return Some(expr1_0); @@ -870,7 +854,7 @@ pub fn constructor_to_amode( pattern12_0, pattern13_0, ) { - // Rule at src/isa/x64/inst.isle line 824. + // Rule at src/isa/x64/inst.isle line 825. let expr0_0 = constructor_put_in_gpr(ctx, pattern6_1)?; let expr1_0 = constructor_amode_imm_reg_flags( ctx, @@ -894,7 +878,7 @@ pub fn constructor_to_amode( if let Some(pattern12_0) = C::const_shift_lt_eq_3(ctx, pattern11_1) { let pattern13_0 = arg2; - // Rule at src/isa/x64/inst.isle line 816. + // Rule at src/isa/x64/inst.isle line 817. let expr0_0 = C::offset32_to_u32(ctx, pattern13_0); let expr1_0 = constructor_put_in_gpr(ctx, pattern6_1)?; let expr2_0 = constructor_put_in_gpr(ctx, pattern11_0)?; @@ -946,7 +930,7 @@ pub fn constructor_to_amode( pattern19_0, ) { - // Rule at src/isa/x64/inst.isle line 829. + // Rule at src/isa/x64/inst.isle line 830. let expr0_0 = constructor_put_in_gpr( ctx, pattern6_1, )?; @@ -992,7 +976,7 @@ pub fn constructor_to_amode( pattern12_0, pattern13_0, ) { - // Rule at src/isa/x64/inst.isle line 826. + // Rule at src/isa/x64/inst.isle line 827. let expr0_0 = constructor_put_in_gpr(ctx, pattern6_0)?; let expr1_0 = constructor_amode_imm_reg_flags( ctx, @@ -1016,7 +1000,7 @@ pub fn constructor_to_amode( if let Some(pattern12_0) = C::const_shift_lt_eq_3(ctx, pattern11_1) { let pattern13_0 = arg2; - // Rule at src/isa/x64/inst.isle line 818. + // Rule at src/isa/x64/inst.isle line 819. let expr0_0 = C::offset32_to_u32(ctx, pattern13_0); let expr1_0 = constructor_put_in_gpr(ctx, pattern6_0)?; let expr2_0 = constructor_put_in_gpr(ctx, pattern11_0)?; @@ -1068,7 +1052,7 @@ pub fn constructor_to_amode( pattern19_0, ) { - // Rule at src/isa/x64/inst.isle line 831. + // Rule at src/isa/x64/inst.isle line 832. let expr0_0 = constructor_put_in_gpr( ctx, pattern6_0, )?; @@ -1091,7 +1075,7 @@ pub fn constructor_to_amode( } } let pattern7_0 = arg2; - // Rule at src/isa/x64/inst.isle line 834. + // Rule at src/isa/x64/inst.isle line 835. let expr0_0 = C::offset32_to_u32(ctx, pattern7_0); let expr1_0 = constructor_put_in_gpr(ctx, pattern6_0)?; let expr2_0 = constructor_put_in_gpr(ctx, pattern6_1)?; @@ -1104,7 +1088,7 @@ pub fn constructor_to_amode( } } let pattern2_0 = arg2; - // Rule at src/isa/x64/inst.isle line 837. + // Rule at src/isa/x64/inst.isle line 838. let expr0_0 = C::offset32_to_u32(ctx, pattern2_0); let expr1_0 = constructor_put_in_gpr(ctx, pattern1_0)?; let expr2_0 = constructor_amode_imm_reg_flags(ctx, expr0_0, expr1_0, pattern0_0)?; @@ -1114,7 +1098,7 @@ pub fn constructor_to_amode( // Generated as internal constructor for term reg_to_gpr_mem_imm. pub fn constructor_reg_to_gpr_mem_imm(ctx: &mut C, arg0: Reg) -> Option { let pattern0_0 = arg0; - // Rule at src/isa/x64/inst.isle line 1031. + // Rule at src/isa/x64/inst.isle line 1032. let expr0_0 = C::gpr_new(ctx, pattern0_0); let expr1_0 = C::gpr_to_gpr_mem_imm(ctx, expr0_0); return Some(expr1_0); @@ -1123,7 +1107,7 @@ pub fn constructor_reg_to_gpr_mem_imm(ctx: &mut C, arg0: Reg) -> Opt // Generated as internal constructor for term put_in_gpr. pub fn constructor_put_in_gpr(ctx: &mut C, arg0: Value) -> Option { let pattern0_0 = arg0; - // Rule at src/isa/x64/inst.isle line 1038. + // Rule at src/isa/x64/inst.isle line 1039. let expr0_0 = C::put_in_reg(ctx, pattern0_0); let expr1_0 = C::gpr_new(ctx, expr0_0); return Some(expr1_0); @@ -1132,7 +1116,7 @@ pub fn constructor_put_in_gpr(ctx: &mut C, arg0: Value) -> Option(ctx: &mut C, arg0: Value) -> Option { let pattern0_0 = arg0; - // Rule at src/isa/x64/inst.isle line 1045. + // Rule at src/isa/x64/inst.isle line 1046. let expr0_0 = C::put_in_reg_mem(ctx, pattern0_0); let expr1_0 = C::reg_mem_to_gpr_mem(ctx, &expr0_0); return Some(expr1_0); @@ -1141,7 +1125,7 @@ pub fn constructor_put_in_gpr_mem(ctx: &mut C, arg0: Value) -> Optio // Generated as internal constructor for term put_in_gpr_mem_imm. pub fn constructor_put_in_gpr_mem_imm(ctx: &mut C, arg0: Value) -> Option { let pattern0_0 = arg0; - // Rule at src/isa/x64/inst.isle line 1052. + // Rule at src/isa/x64/inst.isle line 1053. let expr0_0 = C::put_in_reg_mem_imm(ctx, pattern0_0); let expr1_0 = C::gpr_mem_imm_new(ctx, &expr0_0); return Some(expr1_0); @@ -1150,7 +1134,7 @@ pub fn constructor_put_in_gpr_mem_imm(ctx: &mut C, arg0: Value) -> O // Generated as internal constructor for term put_in_xmm. pub fn constructor_put_in_xmm(ctx: &mut C, arg0: Value) -> Option { let pattern0_0 = arg0; - // Rule at src/isa/x64/inst.isle line 1059. + // Rule at src/isa/x64/inst.isle line 1060. let expr0_0 = C::put_in_reg(ctx, pattern0_0); let expr1_0 = C::xmm_new(ctx, expr0_0); return Some(expr1_0); @@ -1159,7 +1143,7 @@ pub fn constructor_put_in_xmm(ctx: &mut C, arg0: Value) -> Option(ctx: &mut C, arg0: Value) -> Option { let pattern0_0 = arg0; - // Rule at src/isa/x64/inst.isle line 1066. + // Rule at src/isa/x64/inst.isle line 1067. let expr0_0 = C::put_in_reg_mem(ctx, pattern0_0); let expr1_0 = C::reg_mem_to_xmm_mem(ctx, &expr0_0); return Some(expr1_0); @@ -1168,7 +1152,7 @@ pub fn constructor_put_in_xmm_mem(ctx: &mut C, arg0: Value) -> Optio // Generated as internal constructor for term put_in_xmm_mem_imm. pub fn constructor_put_in_xmm_mem_imm(ctx: &mut C, arg0: Value) -> Option { let pattern0_0 = arg0; - // Rule at src/isa/x64/inst.isle line 1073. + // Rule at src/isa/x64/inst.isle line 1074. let expr0_0 = C::put_in_reg_mem_imm(ctx, pattern0_0); let expr1_0 = C::xmm_mem_imm_new(ctx, &expr0_0); return Some(expr1_0); @@ -1177,7 +1161,7 @@ pub fn constructor_put_in_xmm_mem_imm(ctx: &mut C, arg0: Value) -> O // Generated as internal constructor for term output_gpr. pub fn constructor_output_gpr(ctx: &mut C, arg0: Gpr) -> Option { let pattern0_0 = arg0; - // Rule at src/isa/x64/inst.isle line 1078. + // Rule at src/isa/x64/inst.isle line 1079. let expr0_0 = C::gpr_to_reg(ctx, pattern0_0); let expr1_0 = constructor_output_reg(ctx, expr0_0)?; return Some(expr1_0); @@ -1187,7 +1171,7 @@ pub fn constructor_output_gpr(ctx: &mut C, arg0: Gpr) -> Option(ctx: &mut C, arg0: Gpr, arg1: Gpr) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 1083. + // Rule at src/isa/x64/inst.isle line 1084. let expr0_0 = C::gpr_to_reg(ctx, pattern0_0); let expr1_0 = C::gpr_to_reg(ctx, pattern1_0); let expr2_0 = C::value_regs(ctx, expr0_0, expr1_0); @@ -1197,7 +1181,7 @@ pub fn constructor_value_gprs(ctx: &mut C, arg0: Gpr, arg1: Gpr) -> // Generated as internal constructor for term output_xmm. pub fn constructor_output_xmm(ctx: &mut C, arg0: Xmm) -> Option { let pattern0_0 = arg0; - // Rule at src/isa/x64/inst.isle line 1088. + // Rule at src/isa/x64/inst.isle line 1089. let expr0_0 = C::xmm_to_reg(ctx, pattern0_0); let expr1_0 = constructor_output_reg(ctx, expr0_0)?; return Some(expr1_0); @@ -1211,7 +1195,7 @@ pub fn constructor_value_regs_get_gpr( ) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 1095. + // Rule at src/isa/x64/inst.isle line 1096. let expr0_0 = C::value_regs_get(ctx, pattern0_0, pattern1_0); let expr1_0 = C::gpr_new(ctx, expr0_0); return Some(expr1_0); @@ -1220,7 +1204,7 @@ pub fn constructor_value_regs_get_gpr( // Generated as internal constructor for term lo_gpr. pub fn constructor_lo_gpr(ctx: &mut C, arg0: Value) -> Option { let pattern0_0 = arg0; - // Rule at src/isa/x64/inst.isle line 1108. + // Rule at src/isa/x64/inst.isle line 1109. let expr0_0 = constructor_lo_reg(ctx, pattern0_0)?; let expr1_0 = C::gpr_new(ctx, expr0_0); return Some(expr1_0); @@ -1232,7 +1216,7 @@ pub fn constructor_sink_load_to_gpr_mem_imm( arg0: &SinkableLoad, ) -> Option { let pattern0_0 = arg0; - // Rule at src/isa/x64/inst.isle line 1191. + // Rule at src/isa/x64/inst.isle line 1192. let expr0_0 = C::sink_load(ctx, pattern0_0); let expr1_0 = C::gpr_mem_imm_new(ctx, &expr0_0); return Some(expr1_0); @@ -1250,12 +1234,12 @@ pub fn constructor_extend_to_gpr( let pattern2_0 = arg1; if pattern2_0 == pattern1_0 { let pattern4_0 = arg2; - // Rule at src/isa/x64/inst.isle line 1213. + // Rule at src/isa/x64/inst.isle line 1214. let expr0_0 = constructor_put_in_gpr(ctx, pattern0_0)?; return Some(expr0_0); } let pattern3_0 = arg2; - // Rule at src/isa/x64/inst.isle line 1216. + // Rule at src/isa/x64/inst.isle line 1217. let expr0_0 = C::ty_bits_u16(ctx, pattern1_0); let expr1_0 = C::operand_size_of_type_32_64(ctx, pattern2_0); let expr2_0 = constructor_operand_size_bits(ctx, &expr1_0)?; @@ -1279,7 +1263,7 @@ pub fn constructor_extend( let pattern2_0 = arg1; let pattern3_0 = arg2; let pattern4_0 = arg3; - // Rule at src/isa/x64/inst.isle line 1236. + // Rule at src/isa/x64/inst.isle line 1237. let expr0_0 = constructor_x64_movsx(ctx, pattern3_0, pattern4_0)?; return Some(expr0_0); } @@ -1287,7 +1271,7 @@ pub fn constructor_extend( let pattern2_0 = arg1; let pattern3_0 = arg2; let pattern4_0 = arg3; - // Rule at src/isa/x64/inst.isle line 1232. + // Rule at src/isa/x64/inst.isle line 1233. let expr0_0 = constructor_x64_movzx(ctx, pattern3_0, pattern4_0)?; return Some(expr0_0); } @@ -1300,17 +1284,17 @@ pub fn constructor_extend( pub fn constructor_sse_xor_op(ctx: &mut C, arg0: Type) -> Option { let pattern0_0 = arg0; if pattern0_0 == F32X4 { - // Rule at src/isa/x64/inst.isle line 1243. + // Rule at src/isa/x64/inst.isle line 1244. let expr0_0 = SseOpcode::Xorps; return Some(expr0_0); } if pattern0_0 == F64X2 { - // Rule at src/isa/x64/inst.isle line 1244. + // Rule at src/isa/x64/inst.isle line 1245. let expr0_0 = SseOpcode::Xorpd; return Some(expr0_0); } if let Some((pattern1_0, pattern1_1)) = C::multi_lane(ctx, pattern0_0) { - // Rule at src/isa/x64/inst.isle line 1245. + // Rule at src/isa/x64/inst.isle line 1246. let expr0_0 = SseOpcode::Pxor; return Some(expr0_0); } @@ -1327,7 +1311,7 @@ pub fn constructor_sse_xor( let pattern0_0 = arg0; let pattern1_0 = arg1; let pattern2_0 = arg2; - // Rule at src/isa/x64/inst.isle line 1249. + // Rule at src/isa/x64/inst.isle line 1250. let expr0_0 = constructor_sse_xor_op(ctx, pattern0_0)?; let expr1_0 = constructor_xmm_rm_r(ctx, pattern0_0, &expr0_0, pattern1_0, pattern2_0)?; return Some(expr1_0); @@ -1337,40 +1321,40 @@ pub fn constructor_sse_xor( pub fn constructor_sse_cmp_op(ctx: &mut C, arg0: Type) -> Option { let pattern0_0 = arg0; if pattern0_0 == F32X4 { - // Rule at src/isa/x64/inst.isle line 1258. + // Rule at src/isa/x64/inst.isle line 1259. let expr0_0 = SseOpcode::Cmpps; return Some(expr0_0); } if pattern0_0 == F64X2 { - // Rule at src/isa/x64/inst.isle line 1259. + // Rule at src/isa/x64/inst.isle line 1260. let expr0_0 = SseOpcode::Cmppd; return Some(expr0_0); } if let Some((pattern1_0, pattern1_1)) = C::multi_lane(ctx, pattern0_0) { if pattern1_0 == 8 { if pattern1_1 == 16 { - // Rule at src/isa/x64/inst.isle line 1254. + // Rule at src/isa/x64/inst.isle line 1255. let expr0_0 = SseOpcode::Pcmpeqb; return Some(expr0_0); } } if pattern1_0 == 16 { if pattern1_1 == 8 { - // Rule at src/isa/x64/inst.isle line 1255. + // Rule at src/isa/x64/inst.isle line 1256. let expr0_0 = SseOpcode::Pcmpeqw; return Some(expr0_0); } } if pattern1_0 == 32 { if pattern1_1 == 4 { - // Rule at src/isa/x64/inst.isle line 1256. + // Rule at src/isa/x64/inst.isle line 1257. let expr0_0 = SseOpcode::Pcmpeqd; return Some(expr0_0); } } if pattern1_0 == 64 { if pattern1_1 == 2 { - // Rule at src/isa/x64/inst.isle line 1257. + // Rule at src/isa/x64/inst.isle line 1258. let expr0_0 = SseOpcode::Pcmpeqq; return Some(expr0_0); } @@ -1382,7 +1366,7 @@ pub fn constructor_sse_cmp_op(ctx: &mut C, arg0: Type) -> Option(ctx: &mut C, arg0: Type) -> Option { let pattern0_0 = arg0; - // Rule at src/isa/x64/inst.isle line 1273. + // Rule at src/isa/x64/inst.isle line 1274. let expr0_0 = C::temp_writable_xmm(ctx); let expr1_0: Type = I32X4; let expr2_0 = constructor_sse_cmp_op(ctx, expr1_0)?; @@ -1407,7 +1391,7 @@ pub fn constructor_make_i64x2_from_lanes( ) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 1283. + // Rule at src/isa/x64/inst.isle line 1284. let expr0_0 = C::temp_writable_xmm(ctx); let expr1_0 = C::writable_xmm_to_reg(ctx, expr0_0); let expr2_0 = MInst::XmmUninitializedValue { dst: expr0_0 }; @@ -1449,12 +1433,12 @@ pub fn constructor_mov_rmi_to_xmm(ctx: &mut C, arg0: &RegMemImm) -> let pattern0_0 = arg0; match pattern0_0 { &RegMemImm::Imm { simm32: pattern1_0 } => { - // Rule at src/isa/x64/inst.isle line 1304. + // Rule at src/isa/x64/inst.isle line 1305. let expr0_0 = C::xmm_mem_imm_new(ctx, pattern0_0); return Some(expr0_0); } &RegMemImm::Reg { reg: pattern1_0 } => { - // Rule at src/isa/x64/inst.isle line 1305. + // Rule at src/isa/x64/inst.isle line 1306. let expr0_0 = SseOpcode::Movd; let expr1_0 = C::reg_to_gpr_mem(ctx, pattern1_0); let expr2_0 = OperandSize::Size32; @@ -1465,7 +1449,7 @@ pub fn constructor_mov_rmi_to_xmm(ctx: &mut C, arg0: &RegMemImm) -> &RegMemImm::Mem { addr: ref pattern1_0, } => { - // Rule at src/isa/x64/inst.isle line 1303. + // Rule at src/isa/x64/inst.isle line 1304. let expr0_0 = C::xmm_mem_imm_new(ctx, pattern0_0); return Some(expr0_0); } @@ -1485,7 +1469,7 @@ pub fn constructor_x64_load( if pattern0_0 == I64 { let pattern2_0 = arg1; let pattern3_0 = arg2; - // Rule at src/isa/x64/inst.isle line 1319. + // Rule at src/isa/x64/inst.isle line 1320. let expr0_0 = C::temp_writable_gpr(ctx); let expr1_0 = MInst::Mov64MR { src: pattern2_0.clone(), @@ -1498,7 +1482,7 @@ pub fn constructor_x64_load( if pattern0_0 == F32 { let pattern2_0 = arg1; let pattern3_0 = arg2; - // Rule at src/isa/x64/inst.isle line 1324. + // Rule at src/isa/x64/inst.isle line 1325. let expr0_0 = SseOpcode::Movss; let expr1_0 = constructor_synthetic_amode_to_xmm_mem(ctx, pattern2_0)?; let expr2_0 = constructor_xmm_unary_rm_r(ctx, &expr0_0, &expr1_0)?; @@ -1508,7 +1492,7 @@ pub fn constructor_x64_load( if pattern0_0 == F64 { let pattern2_0 = arg1; let pattern3_0 = arg2; - // Rule at src/isa/x64/inst.isle line 1328. + // Rule at src/isa/x64/inst.isle line 1329. let expr0_0 = SseOpcode::Movsd; let expr1_0 = constructor_synthetic_amode_to_xmm_mem(ctx, pattern2_0)?; let expr2_0 = constructor_xmm_unary_rm_r(ctx, &expr0_0, &expr1_0)?; @@ -1518,7 +1502,7 @@ pub fn constructor_x64_load( if pattern0_0 == F32X4 { let pattern2_0 = arg1; let pattern3_0 = arg2; - // Rule at src/isa/x64/inst.isle line 1332. + // Rule at src/isa/x64/inst.isle line 1333. let expr0_0 = SseOpcode::Movups; let expr1_0 = constructor_synthetic_amode_to_xmm_mem(ctx, pattern2_0)?; let expr2_0 = constructor_xmm_unary_rm_r(ctx, &expr0_0, &expr1_0)?; @@ -1528,7 +1512,7 @@ pub fn constructor_x64_load( if pattern0_0 == F64X2 { let pattern2_0 = arg1; let pattern3_0 = arg2; - // Rule at src/isa/x64/inst.isle line 1336. + // Rule at src/isa/x64/inst.isle line 1337. let expr0_0 = SseOpcode::Movupd; let expr1_0 = constructor_synthetic_amode_to_xmm_mem(ctx, pattern2_0)?; let expr2_0 = constructor_xmm_unary_rm_r(ctx, &expr0_0, &expr1_0)?; @@ -1538,7 +1522,7 @@ pub fn constructor_x64_load( if let Some((pattern1_0, pattern1_1)) = C::multi_lane(ctx, pattern0_0) { let pattern2_0 = arg1; let pattern3_0 = arg2; - // Rule at src/isa/x64/inst.isle line 1340. + // Rule at src/isa/x64/inst.isle line 1341. let expr0_0 = SseOpcode::Movdqu; let expr1_0 = constructor_synthetic_amode_to_xmm_mem(ctx, pattern2_0)?; let expr2_0 = constructor_xmm_unary_rm_r(ctx, &expr0_0, &expr1_0)?; @@ -1549,7 +1533,7 @@ pub fn constructor_x64_load( let pattern2_0 = arg1; let pattern3_0 = arg2; if let &ExtKind::SignExtend = pattern3_0 { - // Rule at src/isa/x64/inst.isle line 1315. + // Rule at src/isa/x64/inst.isle line 1316. let expr0_0 = C::ty_bytes(ctx, pattern1_0); let expr1_0: u16 = 8; let expr2_0 = C::ext_mode(ctx, expr0_0, expr1_0); @@ -1565,7 +1549,7 @@ pub fn constructor_x64_load( // Generated as internal constructor for term x64_mov. pub fn constructor_x64_mov(ctx: &mut C, arg0: &Amode) -> Option { let pattern0_0 = arg0; - // Rule at src/isa/x64/inst.isle line 1345. + // Rule at src/isa/x64/inst.isle line 1346. let expr0_0 = C::temp_writable_gpr(ctx); let expr1_0 = C::amode_to_synthetic_amode(ctx, pattern0_0); let expr2_0 = MInst::Mov64MR { @@ -1585,7 +1569,7 @@ pub fn constructor_x64_movzx( ) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 1351. + // Rule at src/isa/x64/inst.isle line 1352. let expr0_0 = C::temp_writable_gpr(ctx); let expr1_0 = MInst::MovzxRmR { ext_mode: pattern0_0.clone(), @@ -1605,7 +1589,7 @@ pub fn constructor_x64_movsx( ) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 1357. + // Rule at src/isa/x64/inst.isle line 1358. let expr0_0 = C::temp_writable_gpr(ctx); let expr1_0 = MInst::MovsxRmR { ext_mode: pattern0_0.clone(), @@ -1620,7 +1604,7 @@ pub fn constructor_x64_movsx( // Generated as internal constructor for term x64_movss_load. pub fn constructor_x64_movss_load(ctx: &mut C, arg0: &XmmMem) -> Option { let pattern0_0 = arg0; - // Rule at src/isa/x64/inst.isle line 1363. + // Rule at src/isa/x64/inst.isle line 1364. let expr0_0 = SseOpcode::Movss; let expr1_0 = constructor_xmm_unary_rm_r(ctx, &expr0_0, pattern0_0)?; return Some(expr1_0); @@ -1629,7 +1613,7 @@ pub fn constructor_x64_movss_load(ctx: &mut C, arg0: &XmmMem) -> Opt // Generated as internal constructor for term x64_movsd_load. pub fn constructor_x64_movsd_load(ctx: &mut C, arg0: &XmmMem) -> Option { let pattern0_0 = arg0; - // Rule at src/isa/x64/inst.isle line 1367. + // Rule at src/isa/x64/inst.isle line 1368. let expr0_0 = SseOpcode::Movsd; let expr1_0 = constructor_xmm_unary_rm_r(ctx, &expr0_0, pattern0_0)?; return Some(expr1_0); @@ -1638,7 +1622,7 @@ pub fn constructor_x64_movsd_load(ctx: &mut C, arg0: &XmmMem) -> Opt // Generated as internal constructor for term x64_movups. pub fn constructor_x64_movups(ctx: &mut C, arg0: &XmmMem) -> Option { let pattern0_0 = arg0; - // Rule at src/isa/x64/inst.isle line 1371. + // Rule at src/isa/x64/inst.isle line 1372. let expr0_0 = SseOpcode::Movups; let expr1_0 = constructor_xmm_unary_rm_r(ctx, &expr0_0, pattern0_0)?; return Some(expr1_0); @@ -1647,7 +1631,7 @@ pub fn constructor_x64_movups(ctx: &mut C, arg0: &XmmMem) -> Option< // Generated as internal constructor for term x64_movupd. pub fn constructor_x64_movupd(ctx: &mut C, arg0: &XmmMem) -> Option { let pattern0_0 = arg0; - // Rule at src/isa/x64/inst.isle line 1375. + // Rule at src/isa/x64/inst.isle line 1376. let expr0_0 = SseOpcode::Movupd; let expr1_0 = constructor_xmm_unary_rm_r(ctx, &expr0_0, pattern0_0)?; return Some(expr1_0); @@ -1656,7 +1640,7 @@ pub fn constructor_x64_movupd(ctx: &mut C, arg0: &XmmMem) -> Option< // Generated as internal constructor for term x64_movdqu. pub fn constructor_x64_movdqu(ctx: &mut C, arg0: &XmmMem) -> Option { let pattern0_0 = arg0; - // Rule at src/isa/x64/inst.isle line 1379. + // Rule at src/isa/x64/inst.isle line 1380. let expr0_0 = SseOpcode::Movdqu; let expr1_0 = constructor_xmm_unary_rm_r(ctx, &expr0_0, pattern0_0)?; return Some(expr1_0); @@ -1665,7 +1649,7 @@ pub fn constructor_x64_movdqu(ctx: &mut C, arg0: &XmmMem) -> Option< // Generated as internal constructor for term x64_pmovsxbw. pub fn constructor_x64_pmovsxbw(ctx: &mut C, arg0: &XmmMem) -> Option { let pattern0_0 = arg0; - // Rule at src/isa/x64/inst.isle line 1383. + // Rule at src/isa/x64/inst.isle line 1384. let expr0_0 = SseOpcode::Pmovsxbw; let expr1_0 = constructor_xmm_unary_rm_r(ctx, &expr0_0, pattern0_0)?; return Some(expr1_0); @@ -1674,7 +1658,7 @@ pub fn constructor_x64_pmovsxbw(ctx: &mut C, arg0: &XmmMem) -> Optio // Generated as internal constructor for term x64_pmovzxbw. pub fn constructor_x64_pmovzxbw(ctx: &mut C, arg0: &XmmMem) -> Option { let pattern0_0 = arg0; - // Rule at src/isa/x64/inst.isle line 1387. + // Rule at src/isa/x64/inst.isle line 1388. let expr0_0 = SseOpcode::Pmovzxbw; let expr1_0 = constructor_xmm_unary_rm_r(ctx, &expr0_0, pattern0_0)?; return Some(expr1_0); @@ -1683,7 +1667,7 @@ pub fn constructor_x64_pmovzxbw(ctx: &mut C, arg0: &XmmMem) -> Optio // Generated as internal constructor for term x64_pmovsxwd. pub fn constructor_x64_pmovsxwd(ctx: &mut C, arg0: &XmmMem) -> Option { let pattern0_0 = arg0; - // Rule at src/isa/x64/inst.isle line 1391. + // Rule at src/isa/x64/inst.isle line 1392. let expr0_0 = SseOpcode::Pmovsxwd; let expr1_0 = constructor_xmm_unary_rm_r(ctx, &expr0_0, pattern0_0)?; return Some(expr1_0); @@ -1692,7 +1676,7 @@ pub fn constructor_x64_pmovsxwd(ctx: &mut C, arg0: &XmmMem) -> Optio // Generated as internal constructor for term x64_pmovzxwd. pub fn constructor_x64_pmovzxwd(ctx: &mut C, arg0: &XmmMem) -> Option { let pattern0_0 = arg0; - // Rule at src/isa/x64/inst.isle line 1395. + // Rule at src/isa/x64/inst.isle line 1396. let expr0_0 = SseOpcode::Pmovzxwd; let expr1_0 = constructor_xmm_unary_rm_r(ctx, &expr0_0, pattern0_0)?; return Some(expr1_0); @@ -1701,7 +1685,7 @@ pub fn constructor_x64_pmovzxwd(ctx: &mut C, arg0: &XmmMem) -> Optio // Generated as internal constructor for term x64_pmovsxdq. pub fn constructor_x64_pmovsxdq(ctx: &mut C, arg0: &XmmMem) -> Option { let pattern0_0 = arg0; - // Rule at src/isa/x64/inst.isle line 1399. + // Rule at src/isa/x64/inst.isle line 1400. let expr0_0 = SseOpcode::Pmovsxdq; let expr1_0 = constructor_xmm_unary_rm_r(ctx, &expr0_0, pattern0_0)?; return Some(expr1_0); @@ -1710,7 +1694,7 @@ pub fn constructor_x64_pmovsxdq(ctx: &mut C, arg0: &XmmMem) -> Optio // Generated as internal constructor for term x64_pmovzxdq. pub fn constructor_x64_pmovzxdq(ctx: &mut C, arg0: &XmmMem) -> Option { let pattern0_0 = arg0; - // Rule at src/isa/x64/inst.isle line 1403. + // Rule at src/isa/x64/inst.isle line 1404. let expr0_0 = SseOpcode::Pmovzxdq; let expr1_0 = constructor_xmm_unary_rm_r(ctx, &expr0_0, pattern0_0)?; return Some(expr1_0); @@ -1724,7 +1708,7 @@ pub fn constructor_x64_xmm_load_const( ) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 1408. + // Rule at src/isa/x64/inst.isle line 1409. let expr0_0 = C::temp_writable_xmm(ctx); let expr1_0 = C::writable_xmm_to_reg(ctx, expr0_0); let expr2_0 = MInst::XmmLoadConst { @@ -1749,7 +1733,7 @@ pub fn constructor_alu_rmi_r( let pattern1_0 = arg1; let pattern2_0 = arg2; let pattern3_0 = arg3; - // Rule at src/isa/x64/inst.isle line 1421. + // Rule at src/isa/x64/inst.isle line 1422. let expr0_0 = C::temp_writable_gpr(ctx); let expr1_0 = C::operand_size_of_type_32_64(ctx, pattern0_0); let expr2_0 = MInst::AluRmiR { @@ -1774,7 +1758,7 @@ pub fn constructor_x64_add( let pattern0_0 = arg0; let pattern1_0 = arg1; let pattern2_0 = arg2; - // Rule at src/isa/x64/inst.isle line 1429. + // Rule at src/isa/x64/inst.isle line 1430. let expr0_0 = AluRmiROpcode::Add; let expr1_0 = constructor_alu_rmi_r(ctx, pattern0_0, &expr0_0, pattern1_0, pattern2_0)?; return Some(expr1_0); @@ -1790,7 +1774,7 @@ pub fn constructor_x64_add_with_flags_paired( let pattern0_0 = arg0; let pattern1_0 = arg1; let pattern2_0 = arg2; - // Rule at src/isa/x64/inst.isle line 1437. + // Rule at src/isa/x64/inst.isle line 1438. let expr0_0 = C::temp_writable_gpr(ctx); let expr1_0 = C::operand_size_of_type_32_64(ctx, pattern0_0); let expr2_0 = AluRmiROpcode::Add; @@ -1819,7 +1803,7 @@ pub fn constructor_x64_adc_paired( let pattern0_0 = arg0; let pattern1_0 = arg1; let pattern2_0 = arg2; - // Rule at src/isa/x64/inst.isle line 1449. + // Rule at src/isa/x64/inst.isle line 1450. let expr0_0 = C::temp_writable_gpr(ctx); let expr1_0 = C::operand_size_of_type_32_64(ctx, pattern0_0); let expr2_0 = AluRmiROpcode::Adc; @@ -1848,7 +1832,7 @@ pub fn constructor_x64_sub( let pattern0_0 = arg0; let pattern1_0 = arg1; let pattern2_0 = arg2; - // Rule at src/isa/x64/inst.isle line 1461. + // Rule at src/isa/x64/inst.isle line 1462. let expr0_0 = AluRmiROpcode::Sub; let expr1_0 = constructor_alu_rmi_r(ctx, pattern0_0, &expr0_0, pattern1_0, pattern2_0)?; return Some(expr1_0); @@ -1864,7 +1848,7 @@ pub fn constructor_x64_sub_with_flags_paired( let pattern0_0 = arg0; let pattern1_0 = arg1; let pattern2_0 = arg2; - // Rule at src/isa/x64/inst.isle line 1469. + // Rule at src/isa/x64/inst.isle line 1470. let expr0_0 = C::temp_writable_gpr(ctx); let expr1_0 = C::operand_size_of_type_32_64(ctx, pattern0_0); let expr2_0 = AluRmiROpcode::Sub; @@ -1893,7 +1877,7 @@ pub fn constructor_x64_sbb_paired( let pattern0_0 = arg0; let pattern1_0 = arg1; let pattern2_0 = arg2; - // Rule at src/isa/x64/inst.isle line 1481. + // Rule at src/isa/x64/inst.isle line 1482. let expr0_0 = C::temp_writable_gpr(ctx); let expr1_0 = C::operand_size_of_type_32_64(ctx, pattern0_0); let expr2_0 = AluRmiROpcode::Sbb; @@ -1922,7 +1906,7 @@ pub fn constructor_x64_mul( let pattern0_0 = arg0; let pattern1_0 = arg1; let pattern2_0 = arg2; - // Rule at src/isa/x64/inst.isle line 1493. + // Rule at src/isa/x64/inst.isle line 1494. let expr0_0 = AluRmiROpcode::Mul; let expr1_0 = constructor_alu_rmi_r(ctx, pattern0_0, &expr0_0, pattern1_0, pattern2_0)?; return Some(expr1_0); @@ -1938,7 +1922,7 @@ pub fn constructor_x64_and( let pattern0_0 = arg0; let pattern1_0 = arg1; let pattern2_0 = arg2; - // Rule at src/isa/x64/inst.isle line 1501. + // Rule at src/isa/x64/inst.isle line 1502. let expr0_0 = AluRmiROpcode::And; let expr1_0 = constructor_alu_rmi_r(ctx, pattern0_0, &expr0_0, pattern1_0, pattern2_0)?; return Some(expr1_0); @@ -1954,7 +1938,7 @@ pub fn constructor_x64_and_with_flags_paired( let pattern0_0 = arg0; let pattern1_0 = arg1; let pattern2_0 = arg2; - // Rule at src/isa/x64/inst.isle line 1508. + // Rule at src/isa/x64/inst.isle line 1509. let expr0_0 = C::temp_writable_gpr(ctx); let expr1_0 = C::operand_size_of_type_32_64(ctx, pattern0_0); let expr2_0 = AluRmiROpcode::And; @@ -1979,7 +1963,7 @@ pub fn constructor_x64_or( let pattern0_0 = arg0; let pattern1_0 = arg1; let pattern2_0 = arg2; - // Rule at src/isa/x64/inst.isle line 1519. + // Rule at src/isa/x64/inst.isle line 1520. let expr0_0 = AluRmiROpcode::Or; let expr1_0 = constructor_alu_rmi_r(ctx, pattern0_0, &expr0_0, pattern1_0, pattern2_0)?; return Some(expr1_0); @@ -1995,7 +1979,7 @@ pub fn constructor_x64_xor( let pattern0_0 = arg0; let pattern1_0 = arg1; let pattern2_0 = arg2; - // Rule at src/isa/x64/inst.isle line 1527. + // Rule at src/isa/x64/inst.isle line 1528. let expr0_0 = AluRmiROpcode::Xor; let expr1_0 = constructor_alu_rmi_r(ctx, pattern0_0, &expr0_0, pattern1_0, pattern2_0)?; return Some(expr1_0); @@ -2007,7 +1991,7 @@ pub fn constructor_imm(ctx: &mut C, arg0: Type, arg1: u64) -> Option if pattern0_0 == I64 { let pattern2_0 = arg1; if let Some(pattern3_0) = C::nonzero_u64_fits_in_u32(ctx, pattern2_0) { - // Rule at src/isa/x64/inst.isle line 1567. + // Rule at src/isa/x64/inst.isle line 1568. let expr0_0 = C::temp_writable_gpr(ctx); let expr1_0 = OperandSize::Size32; let expr2_0 = MInst::Imm { @@ -2023,7 +2007,7 @@ pub fn constructor_imm(ctx: &mut C, arg0: Type, arg1: u64) -> Option if pattern0_0 == F32 { let pattern2_0 = arg1; if pattern2_0 == 0 { - // Rule at src/isa/x64/inst.isle line 1596. + // Rule at src/isa/x64/inst.isle line 1597. let expr0_0 = C::temp_writable_xmm(ctx); let expr1_0 = C::writable_xmm_to_xmm(ctx, expr0_0); let expr2_0 = SseOpcode::Xorps; @@ -2038,7 +2022,7 @@ pub fn constructor_imm(ctx: &mut C, arg0: Type, arg1: u64) -> Option let expr6_0 = C::xmm_to_reg(ctx, expr1_0); return Some(expr6_0); } - // Rule at src/isa/x64/inst.isle line 1544. + // Rule at src/isa/x64/inst.isle line 1545. let expr0_0 = SseOpcode::Movd; let expr1_0: Type = I32; let expr2_0 = constructor_imm(ctx, expr1_0, pattern2_0)?; @@ -2051,7 +2035,7 @@ pub fn constructor_imm(ctx: &mut C, arg0: Type, arg1: u64) -> Option if pattern0_0 == F64 { let pattern2_0 = arg1; if pattern2_0 == 0 { - // Rule at src/isa/x64/inst.isle line 1608. + // Rule at src/isa/x64/inst.isle line 1609. let expr0_0 = C::temp_writable_xmm(ctx); let expr1_0 = C::writable_xmm_to_xmm(ctx, expr0_0); let expr2_0 = SseOpcode::Xorpd; @@ -2066,7 +2050,7 @@ pub fn constructor_imm(ctx: &mut C, arg0: Type, arg1: u64) -> Option let expr6_0 = C::xmm_to_reg(ctx, expr1_0); return Some(expr6_0); } - // Rule at src/isa/x64/inst.isle line 1550. + // Rule at src/isa/x64/inst.isle line 1551. let expr0_0 = SseOpcode::Movq; let expr1_0: Type = I64; let expr2_0 = constructor_imm(ctx, expr1_0, pattern2_0)?; @@ -2079,7 +2063,7 @@ pub fn constructor_imm(ctx: &mut C, arg0: Type, arg1: u64) -> Option if let Some((pattern1_0, pattern1_1)) = C::multi_lane(ctx, pattern0_0) { let pattern2_0 = arg1; if pattern2_0 == 0 { - // Rule at src/isa/x64/inst.isle line 1586. + // Rule at src/isa/x64/inst.isle line 1587. let expr0_0 = C::temp_writable_xmm(ctx); let expr1_0 = C::writable_xmm_to_xmm(ctx, expr0_0); let expr2_0 = constructor_sse_xor_op(ctx, pattern0_0)?; @@ -2098,7 +2082,7 @@ pub fn constructor_imm(ctx: &mut C, arg0: Type, arg1: u64) -> Option if let Some(pattern1_0) = C::fits_in_64(ctx, pattern0_0) { let pattern2_0 = arg1; if pattern2_0 == 0 { - // Rule at src/isa/x64/inst.isle line 1573. + // Rule at src/isa/x64/inst.isle line 1574. let expr0_0 = C::temp_writable_gpr(ctx); let expr1_0 = C::writable_gpr_to_gpr(ctx, expr0_0); let expr2_0 = C::operand_size_of_type_32_64(ctx, pattern1_0); @@ -2115,7 +2099,7 @@ pub fn constructor_imm(ctx: &mut C, arg0: Type, arg1: u64) -> Option let expr7_0 = C::gpr_to_reg(ctx, expr1_0); return Some(expr7_0); } - // Rule at src/isa/x64/inst.isle line 1537. + // Rule at src/isa/x64/inst.isle line 1538. let expr0_0 = C::temp_writable_gpr(ctx); let expr1_0 = C::operand_size_of_type_32_64(ctx, pattern1_0); let expr2_0 = MInst::Imm { @@ -2134,7 +2118,7 @@ pub fn constructor_imm(ctx: &mut C, arg0: Type, arg1: u64) -> Option pub fn constructor_imm_i64(ctx: &mut C, arg0: Type, arg1: i64) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 1559. + // Rule at src/isa/x64/inst.isle line 1560. let expr0_0 = C::i64_as_u64(ctx, pattern1_0); let expr1_0 = constructor_imm(ctx, pattern0_0, expr0_0)?; return Some(expr1_0); @@ -2152,7 +2136,7 @@ pub fn constructor_shift_r( let pattern1_0 = arg1; let pattern2_0 = arg2; let pattern3_0 = arg3; - // Rule at src/isa/x64/inst.isle line 1621. + // Rule at src/isa/x64/inst.isle line 1622. let expr0_0 = C::temp_writable_gpr(ctx); let expr1_0 = C::raw_operand_size_of_type(ctx, pattern0_0); let expr2_0 = MInst::ShiftR { @@ -2177,7 +2161,7 @@ pub fn constructor_x64_rotl( let pattern0_0 = arg0; let pattern1_0 = arg1; let pattern2_0 = arg2; - // Rule at src/isa/x64/inst.isle line 1631. + // Rule at src/isa/x64/inst.isle line 1632. let expr0_0 = ShiftKind::RotateLeft; let expr1_0 = constructor_shift_r(ctx, pattern0_0, &expr0_0, pattern1_0, pattern2_0)?; return Some(expr1_0); @@ -2193,7 +2177,7 @@ pub fn constructor_x64_rotr( let pattern0_0 = arg0; let pattern1_0 = arg1; let pattern2_0 = arg2; - // Rule at src/isa/x64/inst.isle line 1636. + // Rule at src/isa/x64/inst.isle line 1637. let expr0_0 = ShiftKind::RotateRight; let expr1_0 = constructor_shift_r(ctx, pattern0_0, &expr0_0, pattern1_0, pattern2_0)?; return Some(expr1_0); @@ -2209,7 +2193,7 @@ pub fn constructor_x64_shl( let pattern0_0 = arg0; let pattern1_0 = arg1; let pattern2_0 = arg2; - // Rule at src/isa/x64/inst.isle line 1641. + // Rule at src/isa/x64/inst.isle line 1642. let expr0_0 = ShiftKind::ShiftLeft; let expr1_0 = constructor_shift_r(ctx, pattern0_0, &expr0_0, pattern1_0, pattern2_0)?; return Some(expr1_0); @@ -2225,7 +2209,7 @@ pub fn constructor_x64_shr( let pattern0_0 = arg0; let pattern1_0 = arg1; let pattern2_0 = arg2; - // Rule at src/isa/x64/inst.isle line 1646. + // Rule at src/isa/x64/inst.isle line 1647. let expr0_0 = ShiftKind::ShiftRightLogical; let expr1_0 = constructor_shift_r(ctx, pattern0_0, &expr0_0, pattern1_0, pattern2_0)?; return Some(expr1_0); @@ -2241,7 +2225,7 @@ pub fn constructor_x64_sar( let pattern0_0 = arg0; let pattern1_0 = arg1; let pattern2_0 = arg2; - // Rule at src/isa/x64/inst.isle line 1651. + // Rule at src/isa/x64/inst.isle line 1652. let expr0_0 = ShiftKind::ShiftRightArithmetic; let expr1_0 = constructor_shift_r(ctx, pattern0_0, &expr0_0, pattern1_0, pattern2_0)?; return Some(expr1_0); @@ -2259,7 +2243,7 @@ pub fn constructor_cmp_rmi_r( let pattern1_0 = arg1; let pattern2_0 = arg2; let pattern3_0 = arg3; - // Rule at src/isa/x64/inst.isle line 1656. + // Rule at src/isa/x64/inst.isle line 1657. let expr0_0 = MInst::CmpRmiR { size: pattern0_0.clone(), opcode: pattern1_0.clone(), @@ -2280,7 +2264,7 @@ pub fn constructor_x64_cmp( let pattern0_0 = arg0; let pattern1_0 = arg1; let pattern2_0 = arg2; - // Rule at src/isa/x64/inst.isle line 1665. + // Rule at src/isa/x64/inst.isle line 1666. let expr0_0 = CmpOpcode::Cmp; let expr1_0 = constructor_cmp_rmi_r(ctx, pattern0_0, &expr0_0, pattern1_0, pattern2_0)?; return Some(expr1_0); @@ -2296,7 +2280,7 @@ pub fn constructor_x64_cmp_imm( let pattern0_0 = arg0; let pattern1_0 = arg1; let pattern2_0 = arg2; - // Rule at src/isa/x64/inst.isle line 1670. + // Rule at src/isa/x64/inst.isle line 1671. let expr0_0 = CmpOpcode::Cmp; let expr1_0 = RegMemImm::Imm { simm32: pattern1_0 }; let expr2_0 = C::gpr_mem_imm_new(ctx, &expr1_0); @@ -2314,7 +2298,7 @@ pub fn constructor_xmm_cmp_rm_r( let pattern0_0 = arg0; let pattern1_0 = arg1; let pattern2_0 = arg2; - // Rule at src/isa/x64/inst.isle line 1675. + // Rule at src/isa/x64/inst.isle line 1676. let expr0_0 = MInst::XmmCmpRmR { op: pattern0_0.clone(), src: pattern1_0.clone(), @@ -2334,7 +2318,7 @@ pub fn constructor_x64_ucomis( let pattern1_0 = C::value_type(ctx, pattern0_0); if pattern1_0 == F32 { let pattern3_0 = arg1; - // Rule at src/isa/x64/inst.isle line 1681. + // Rule at src/isa/x64/inst.isle line 1682. let expr0_0 = SseOpcode::Ucomiss; let expr1_0 = constructor_put_in_xmm(ctx, pattern0_0)?; let expr2_0 = C::xmm_to_xmm_mem(ctx, expr1_0); @@ -2344,7 +2328,7 @@ pub fn constructor_x64_ucomis( } if pattern1_0 == F64 { let pattern3_0 = arg1; - // Rule at src/isa/x64/inst.isle line 1685. + // Rule at src/isa/x64/inst.isle line 1686. let expr0_0 = SseOpcode::Ucomisd; let expr1_0 = constructor_put_in_xmm(ctx, pattern0_0)?; let expr2_0 = C::xmm_to_xmm_mem(ctx, expr1_0); @@ -2365,7 +2349,7 @@ pub fn constructor_x64_test( let pattern0_0 = arg0; let pattern1_0 = arg1; let pattern2_0 = arg2; - // Rule at src/isa/x64/inst.isle line 1690. + // Rule at src/isa/x64/inst.isle line 1691. let expr0_0 = CmpOpcode::Test; let expr1_0 = constructor_cmp_rmi_r(ctx, pattern0_0, &expr0_0, pattern1_0, pattern2_0)?; return Some(expr1_0); @@ -2383,7 +2367,7 @@ pub fn constructor_cmove( let pattern1_0 = arg1; let pattern2_0 = arg2; let pattern3_0 = arg3; - // Rule at src/isa/x64/inst.isle line 1697. + // Rule at src/isa/x64/inst.isle line 1698. let expr0_0 = C::temp_writable_gpr(ctx); let expr1_0 = C::operand_size_of_type_32_64(ctx, pattern0_0); let expr2_0 = MInst::Cmove { @@ -2413,7 +2397,7 @@ pub fn constructor_cmove_xmm( let pattern1_0 = arg1; let pattern2_0 = arg2; let pattern3_0 = arg3; - // Rule at src/isa/x64/inst.isle line 1705. + // Rule at src/isa/x64/inst.isle line 1706. let expr0_0 = C::temp_writable_xmm(ctx); let expr1_0 = C::operand_size_of_type_32_64(ctx, pattern0_0); let expr2_0 = MInst::XmmCmove { @@ -2444,7 +2428,7 @@ pub fn constructor_cmove_from_values( let pattern2_0 = arg1; let pattern3_0 = arg2; let pattern4_0 = arg3; - // Rule at src/isa/x64/inst.isle line 1716. + // Rule at src/isa/x64/inst.isle line 1717. let expr0_0 = C::put_in_regs(ctx, pattern3_0); let expr1_0 = C::put_in_regs(ctx, pattern4_0); let expr2_0 = C::temp_writable_gpr(ctx); @@ -2489,7 +2473,7 @@ pub fn constructor_cmove_from_values( let pattern3_0 = arg1; let pattern4_0 = arg2; let pattern5_0 = arg3; - // Rule at src/isa/x64/inst.isle line 1740. + // Rule at src/isa/x64/inst.isle line 1741. let expr0_0 = constructor_put_in_xmm_mem(ctx, pattern4_0)?; let expr1_0 = constructor_put_in_xmm(ctx, pattern5_0)?; let expr2_0 = constructor_cmove_xmm(ctx, pattern2_0, pattern3_0, &expr0_0, expr1_0)?; @@ -2501,7 +2485,7 @@ pub fn constructor_cmove_from_values( let pattern3_0 = arg1; let pattern4_0 = arg2; let pattern5_0 = arg3; - // Rule at src/isa/x64/inst.isle line 1737. + // Rule at src/isa/x64/inst.isle line 1738. let expr0_0 = constructor_put_in_gpr_mem(ctx, pattern4_0)?; let expr1_0 = constructor_put_in_gpr(ctx, pattern5_0)?; let expr2_0 = constructor_cmove(ctx, pattern2_0, pattern3_0, &expr0_0, expr1_0)?; @@ -2525,7 +2509,7 @@ pub fn constructor_cmove_or( let pattern2_0 = arg2; let pattern3_0 = arg3; let pattern4_0 = arg4; - // Rule at src/isa/x64/inst.isle line 1747. + // Rule at src/isa/x64/inst.isle line 1748. let expr0_0 = C::temp_writable_gpr(ctx); let expr1_0 = C::temp_writable_gpr(ctx); let expr2_0 = C::operand_size_of_type_32_64(ctx, pattern0_0); @@ -2567,7 +2551,7 @@ pub fn constructor_cmove_or_xmm( let pattern2_0 = arg2; let pattern3_0 = arg3; let pattern4_0 = arg4; - // Rule at src/isa/x64/inst.isle line 1759. + // Rule at src/isa/x64/inst.isle line 1760. let expr0_0 = C::temp_writable_xmm(ctx); let expr1_0 = C::temp_writable_xmm(ctx); let expr2_0 = C::operand_size_of_type_32_64(ctx, pattern0_0); @@ -2610,7 +2594,7 @@ pub fn constructor_cmove_or_from_values( let pattern3_0 = arg2; let pattern4_0 = arg3; let pattern5_0 = arg4; - // Rule at src/isa/x64/inst.isle line 1774. + // Rule at src/isa/x64/inst.isle line 1775. let expr0_0 = C::put_in_regs(ctx, pattern4_0); let expr1_0 = C::put_in_regs(ctx, pattern5_0); let expr2_0 = C::temp_writable_gpr(ctx); @@ -2682,7 +2666,7 @@ pub fn constructor_cmove_or_from_values( let pattern4_0 = arg2; let pattern5_0 = arg3; let pattern6_0 = arg4; - // Rule at src/isa/x64/inst.isle line 1796. + // Rule at src/isa/x64/inst.isle line 1797. let expr0_0 = constructor_put_in_xmm_mem(ctx, pattern5_0)?; let expr1_0 = constructor_put_in_xmm(ctx, pattern6_0)?; let expr2_0 = constructor_cmove_or_xmm( @@ -2697,7 +2681,7 @@ pub fn constructor_cmove_or_from_values( let pattern4_0 = arg2; let pattern5_0 = arg3; let pattern6_0 = arg4; - // Rule at src/isa/x64/inst.isle line 1793. + // Rule at src/isa/x64/inst.isle line 1794. let expr0_0 = constructor_put_in_gpr_mem(ctx, pattern5_0)?; let expr1_0 = constructor_put_in_gpr(ctx, pattern6_0)?; let expr2_0 = @@ -2711,7 +2695,7 @@ pub fn constructor_cmove_or_from_values( // Generated as internal constructor for term x64_setcc. pub fn constructor_x64_setcc(ctx: &mut C, arg0: &CC) -> Option { let pattern0_0 = arg0; - // Rule at src/isa/x64/inst.isle line 1801. + // Rule at src/isa/x64/inst.isle line 1802. let expr0_0 = C::temp_writable_gpr(ctx); let expr1_0 = MInst::Setcc { cc: pattern0_0.clone(), @@ -2737,7 +2721,7 @@ pub fn constructor_xmm_rm_r( let pattern1_0 = arg1; let pattern2_0 = arg2; let pattern3_0 = arg3; - // Rule at src/isa/x64/inst.isle line 1809. + // Rule at src/isa/x64/inst.isle line 1810. let expr0_0 = C::temp_writable_xmm(ctx); let expr1_0 = MInst::XmmRmR { op: pattern1_0.clone(), @@ -2754,7 +2738,7 @@ pub fn constructor_xmm_rm_r( pub fn constructor_x64_paddb(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 1816. + // Rule at src/isa/x64/inst.isle line 1817. let expr0_0: Type = I8X16; let expr1_0 = SseOpcode::Paddb; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -2765,7 +2749,7 @@ pub fn constructor_x64_paddb(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) pub fn constructor_x64_paddw(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 1821. + // Rule at src/isa/x64/inst.isle line 1822. let expr0_0: Type = I16X8; let expr1_0 = SseOpcode::Paddw; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -2776,7 +2760,7 @@ pub fn constructor_x64_paddw(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) pub fn constructor_x64_paddd(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 1826. + // Rule at src/isa/x64/inst.isle line 1827. let expr0_0: Type = I32X4; let expr1_0 = SseOpcode::Paddd; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -2787,7 +2771,7 @@ pub fn constructor_x64_paddd(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) pub fn constructor_x64_paddq(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 1831. + // Rule at src/isa/x64/inst.isle line 1832. let expr0_0: Type = I64X2; let expr1_0 = SseOpcode::Paddq; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -2798,7 +2782,7 @@ pub fn constructor_x64_paddq(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) pub fn constructor_x64_paddsb(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 1836. + // Rule at src/isa/x64/inst.isle line 1837. let expr0_0: Type = I8X16; let expr1_0 = SseOpcode::Paddsb; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -2809,7 +2793,7 @@ pub fn constructor_x64_paddsb(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) pub fn constructor_x64_paddsw(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 1841. + // Rule at src/isa/x64/inst.isle line 1842. let expr0_0: Type = I16X8; let expr1_0 = SseOpcode::Paddsw; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -2820,7 +2804,7 @@ pub fn constructor_x64_paddsw(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) pub fn constructor_x64_paddusb(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 1846. + // Rule at src/isa/x64/inst.isle line 1847. let expr0_0: Type = I8X16; let expr1_0 = SseOpcode::Paddusb; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -2831,7 +2815,7 @@ pub fn constructor_x64_paddusb(ctx: &mut C, arg0: Xmm, arg1: &XmmMem pub fn constructor_x64_paddusw(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 1851. + // Rule at src/isa/x64/inst.isle line 1852. let expr0_0: Type = I16X8; let expr1_0 = SseOpcode::Paddusw; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -2842,7 +2826,7 @@ pub fn constructor_x64_paddusw(ctx: &mut C, arg0: Xmm, arg1: &XmmMem pub fn constructor_x64_psubb(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 1856. + // Rule at src/isa/x64/inst.isle line 1857. let expr0_0: Type = I8X16; let expr1_0 = SseOpcode::Psubb; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -2853,7 +2837,7 @@ pub fn constructor_x64_psubb(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) pub fn constructor_x64_psubw(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 1861. + // Rule at src/isa/x64/inst.isle line 1862. let expr0_0: Type = I16X8; let expr1_0 = SseOpcode::Psubw; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -2864,7 +2848,7 @@ pub fn constructor_x64_psubw(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) pub fn constructor_x64_psubd(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 1866. + // Rule at src/isa/x64/inst.isle line 1867. let expr0_0: Type = I32X4; let expr1_0 = SseOpcode::Psubd; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -2875,7 +2859,7 @@ pub fn constructor_x64_psubd(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) pub fn constructor_x64_psubq(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 1871. + // Rule at src/isa/x64/inst.isle line 1872. let expr0_0: Type = I64X2; let expr1_0 = SseOpcode::Psubq; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -2886,7 +2870,7 @@ pub fn constructor_x64_psubq(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) pub fn constructor_x64_psubsb(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 1876. + // Rule at src/isa/x64/inst.isle line 1877. let expr0_0: Type = I8X16; let expr1_0 = SseOpcode::Psubsb; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -2897,7 +2881,7 @@ pub fn constructor_x64_psubsb(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) pub fn constructor_x64_psubsw(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 1881. + // Rule at src/isa/x64/inst.isle line 1882. let expr0_0: Type = I16X8; let expr1_0 = SseOpcode::Psubsw; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -2908,7 +2892,7 @@ pub fn constructor_x64_psubsw(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) pub fn constructor_x64_psubusb(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 1886. + // Rule at src/isa/x64/inst.isle line 1887. let expr0_0: Type = I8X16; let expr1_0 = SseOpcode::Psubusb; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -2919,7 +2903,7 @@ pub fn constructor_x64_psubusb(ctx: &mut C, arg0: Xmm, arg1: &XmmMem pub fn constructor_x64_psubusw(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 1891. + // Rule at src/isa/x64/inst.isle line 1892. let expr0_0: Type = I16X8; let expr1_0 = SseOpcode::Psubusw; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -2930,7 +2914,7 @@ pub fn constructor_x64_psubusw(ctx: &mut C, arg0: Xmm, arg1: &XmmMem pub fn constructor_x64_pavgb(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 1896. + // Rule at src/isa/x64/inst.isle line 1897. let expr0_0: Type = I8X16; let expr1_0 = SseOpcode::Pavgb; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -2941,7 +2925,7 @@ pub fn constructor_x64_pavgb(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) pub fn constructor_x64_pavgw(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 1901. + // Rule at src/isa/x64/inst.isle line 1902. let expr0_0: Type = I16X8; let expr1_0 = SseOpcode::Pavgw; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -2952,7 +2936,7 @@ pub fn constructor_x64_pavgw(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) pub fn constructor_x64_pand(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 1906. + // Rule at src/isa/x64/inst.isle line 1907. let expr0_0: Type = F32X4; let expr1_0 = SseOpcode::Pand; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -2963,7 +2947,7 @@ pub fn constructor_x64_pand(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) - pub fn constructor_x64_andps(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 1911. + // Rule at src/isa/x64/inst.isle line 1912. let expr0_0: Type = F32X4; let expr1_0 = SseOpcode::Andps; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -2974,7 +2958,7 @@ pub fn constructor_x64_andps(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) pub fn constructor_x64_andpd(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 1916. + // Rule at src/isa/x64/inst.isle line 1917. let expr0_0: Type = F64X2; let expr1_0 = SseOpcode::Andpd; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -2985,7 +2969,7 @@ pub fn constructor_x64_andpd(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) pub fn constructor_x64_por(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 1921. + // Rule at src/isa/x64/inst.isle line 1922. let expr0_0: Type = F32X4; let expr1_0 = SseOpcode::Por; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -2996,7 +2980,7 @@ pub fn constructor_x64_por(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> pub fn constructor_x64_orps(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 1926. + // Rule at src/isa/x64/inst.isle line 1927. let expr0_0: Type = F32X4; let expr1_0 = SseOpcode::Orps; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -3007,7 +2991,7 @@ pub fn constructor_x64_orps(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) - pub fn constructor_x64_orpd(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 1931. + // Rule at src/isa/x64/inst.isle line 1932. let expr0_0: Type = F64X2; let expr1_0 = SseOpcode::Orpd; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -3018,7 +3002,7 @@ pub fn constructor_x64_orpd(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) - pub fn constructor_x64_pxor(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 1936. + // Rule at src/isa/x64/inst.isle line 1937. let expr0_0: Type = I8X16; let expr1_0 = SseOpcode::Pxor; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -3029,7 +3013,7 @@ pub fn constructor_x64_pxor(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) - pub fn constructor_x64_xorps(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 1941. + // Rule at src/isa/x64/inst.isle line 1942. let expr0_0: Type = F32X4; let expr1_0 = SseOpcode::Xorps; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -3040,7 +3024,7 @@ pub fn constructor_x64_xorps(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) pub fn constructor_x64_xorpd(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 1946. + // Rule at src/isa/x64/inst.isle line 1947. let expr0_0: Type = F64X2; let expr1_0 = SseOpcode::Xorpd; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -3051,7 +3035,7 @@ pub fn constructor_x64_xorpd(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) pub fn constructor_x64_pmullw(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 1951. + // Rule at src/isa/x64/inst.isle line 1952. let expr0_0: Type = I16X8; let expr1_0 = SseOpcode::Pmullw; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -3062,7 +3046,7 @@ pub fn constructor_x64_pmullw(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) pub fn constructor_x64_pmulld(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 1956. + // Rule at src/isa/x64/inst.isle line 1957. let expr0_0: Type = I16X8; let expr1_0 = SseOpcode::Pmulld; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -3073,7 +3057,7 @@ pub fn constructor_x64_pmulld(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) pub fn constructor_x64_pmulhw(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 1961. + // Rule at src/isa/x64/inst.isle line 1962. let expr0_0: Type = I16X8; let expr1_0 = SseOpcode::Pmulhw; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -3084,7 +3068,7 @@ pub fn constructor_x64_pmulhw(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) pub fn constructor_x64_pmulhuw(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 1966. + // Rule at src/isa/x64/inst.isle line 1967. let expr0_0: Type = I16X8; let expr1_0 = SseOpcode::Pmulhuw; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -3095,7 +3079,7 @@ pub fn constructor_x64_pmulhuw(ctx: &mut C, arg0: Xmm, arg1: &XmmMem pub fn constructor_x64_pmuldq(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 1971. + // Rule at src/isa/x64/inst.isle line 1972. let expr0_0: Type = I16X8; let expr1_0 = SseOpcode::Pmuldq; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -3106,7 +3090,7 @@ pub fn constructor_x64_pmuldq(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) pub fn constructor_x64_pmuludq(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 1976. + // Rule at src/isa/x64/inst.isle line 1977. let expr0_0: Type = I64X2; let expr1_0 = SseOpcode::Pmuludq; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -3117,7 +3101,7 @@ pub fn constructor_x64_pmuludq(ctx: &mut C, arg0: Xmm, arg1: &XmmMem pub fn constructor_x64_punpckhwd(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 1981. + // Rule at src/isa/x64/inst.isle line 1982. let expr0_0: Type = I16X8; let expr1_0 = SseOpcode::Punpckhwd; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -3128,7 +3112,7 @@ pub fn constructor_x64_punpckhwd(ctx: &mut C, arg0: Xmm, arg1: &XmmM pub fn constructor_x64_punpcklwd(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 1986. + // Rule at src/isa/x64/inst.isle line 1987. let expr0_0: Type = I16X8; let expr1_0 = SseOpcode::Punpcklwd; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -3139,7 +3123,7 @@ pub fn constructor_x64_punpcklwd(ctx: &mut C, arg0: Xmm, arg1: &XmmM pub fn constructor_x64_andnps(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 1991. + // Rule at src/isa/x64/inst.isle line 1992. let expr0_0: Type = F32X4; let expr1_0 = SseOpcode::Andnps; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -3150,7 +3134,7 @@ pub fn constructor_x64_andnps(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) pub fn constructor_x64_andnpd(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 1996. + // Rule at src/isa/x64/inst.isle line 1997. let expr0_0: Type = F64X2; let expr1_0 = SseOpcode::Andnpd; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -3161,7 +3145,7 @@ pub fn constructor_x64_andnpd(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) pub fn constructor_x64_pandn(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 2001. + // Rule at src/isa/x64/inst.isle line 2002. let expr0_0: Type = F64X2; let expr1_0 = SseOpcode::Pandn; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -3172,7 +3156,7 @@ pub fn constructor_x64_pandn(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) pub fn constructor_x64_addss(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 2006. + // Rule at src/isa/x64/inst.isle line 2007. let expr0_0: Type = F32; let expr1_0 = SseOpcode::Addss; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -3183,7 +3167,7 @@ pub fn constructor_x64_addss(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) pub fn constructor_x64_addsd(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 2011. + // Rule at src/isa/x64/inst.isle line 2012. let expr0_0: Type = F64; let expr1_0 = SseOpcode::Addsd; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -3194,7 +3178,7 @@ pub fn constructor_x64_addsd(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) pub fn constructor_x64_addps(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 2016. + // Rule at src/isa/x64/inst.isle line 2017. let expr0_0: Type = F32; let expr1_0 = SseOpcode::Addps; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -3205,7 +3189,7 @@ pub fn constructor_x64_addps(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) pub fn constructor_x64_addpd(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 2021. + // Rule at src/isa/x64/inst.isle line 2022. let expr0_0: Type = F32; let expr1_0 = SseOpcode::Addpd; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -3216,7 +3200,7 @@ pub fn constructor_x64_addpd(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) pub fn constructor_x64_subss(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 2026. + // Rule at src/isa/x64/inst.isle line 2027. let expr0_0: Type = F32; let expr1_0 = SseOpcode::Subss; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -3227,7 +3211,7 @@ pub fn constructor_x64_subss(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) pub fn constructor_x64_subsd(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 2031. + // Rule at src/isa/x64/inst.isle line 2032. let expr0_0: Type = F64; let expr1_0 = SseOpcode::Subsd; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -3238,7 +3222,7 @@ pub fn constructor_x64_subsd(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) pub fn constructor_x64_subps(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 2036. + // Rule at src/isa/x64/inst.isle line 2037. let expr0_0: Type = F32; let expr1_0 = SseOpcode::Subps; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -3249,7 +3233,7 @@ pub fn constructor_x64_subps(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) pub fn constructor_x64_subpd(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 2041. + // Rule at src/isa/x64/inst.isle line 2042. let expr0_0: Type = F32; let expr1_0 = SseOpcode::Subpd; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -3260,7 +3244,7 @@ pub fn constructor_x64_subpd(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) pub fn constructor_x64_mulss(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 2046. + // Rule at src/isa/x64/inst.isle line 2047. let expr0_0: Type = F32; let expr1_0 = SseOpcode::Mulss; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -3271,7 +3255,7 @@ pub fn constructor_x64_mulss(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) pub fn constructor_x64_mulsd(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 2051. + // Rule at src/isa/x64/inst.isle line 2052. let expr0_0: Type = F64; let expr1_0 = SseOpcode::Mulsd; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -3282,7 +3266,7 @@ pub fn constructor_x64_mulsd(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) pub fn constructor_x64_mulps(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 2056. + // Rule at src/isa/x64/inst.isle line 2057. let expr0_0: Type = F32; let expr1_0 = SseOpcode::Mulps; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -3293,7 +3277,7 @@ pub fn constructor_x64_mulps(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) pub fn constructor_x64_mulpd(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 2061. + // Rule at src/isa/x64/inst.isle line 2062. let expr0_0: Type = F32; let expr1_0 = SseOpcode::Mulpd; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -3304,7 +3288,7 @@ pub fn constructor_x64_mulpd(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) pub fn constructor_x64_divss(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 2066. + // Rule at src/isa/x64/inst.isle line 2067. let expr0_0: Type = F32; let expr1_0 = SseOpcode::Divss; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -3315,7 +3299,7 @@ pub fn constructor_x64_divss(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) pub fn constructor_x64_divsd(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 2071. + // Rule at src/isa/x64/inst.isle line 2072. let expr0_0: Type = F64; let expr1_0 = SseOpcode::Divsd; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -3326,7 +3310,7 @@ pub fn constructor_x64_divsd(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) pub fn constructor_x64_divps(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 2076. + // Rule at src/isa/x64/inst.isle line 2077. let expr0_0: Type = F32; let expr1_0 = SseOpcode::Divps; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -3337,7 +3321,7 @@ pub fn constructor_x64_divps(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) pub fn constructor_x64_divpd(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 2081. + // Rule at src/isa/x64/inst.isle line 2082. let expr0_0: Type = F32; let expr1_0 = SseOpcode::Divpd; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -3348,17 +3332,17 @@ pub fn constructor_x64_divpd(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) pub fn constructor_sse_blend_op(ctx: &mut C, arg0: Type) -> Option { let pattern0_0 = arg0; if pattern0_0 == F32X4 { - // Rule at src/isa/x64/inst.isle line 2085. + // Rule at src/isa/x64/inst.isle line 2086. let expr0_0 = SseOpcode::Blendvps; return Some(expr0_0); } if pattern0_0 == F64X2 { - // Rule at src/isa/x64/inst.isle line 2086. + // Rule at src/isa/x64/inst.isle line 2087. let expr0_0 = SseOpcode::Blendvpd; return Some(expr0_0); } if let Some((pattern1_0, pattern1_1)) = C::multi_lane(ctx, pattern0_0) { - // Rule at src/isa/x64/inst.isle line 2087. + // Rule at src/isa/x64/inst.isle line 2088. let expr0_0 = SseOpcode::Pblendvb; return Some(expr0_0); } @@ -3369,17 +3353,17 @@ pub fn constructor_sse_blend_op(ctx: &mut C, arg0: Type) -> Option(ctx: &mut C, arg0: Type) -> Option { let pattern0_0 = arg0; if pattern0_0 == F32X4 { - // Rule at src/isa/x64/inst.isle line 2090. + // Rule at src/isa/x64/inst.isle line 2091. let expr0_0 = SseOpcode::Movaps; return Some(expr0_0); } if pattern0_0 == F64X2 { - // Rule at src/isa/x64/inst.isle line 2091. + // Rule at src/isa/x64/inst.isle line 2092. let expr0_0 = SseOpcode::Movapd; return Some(expr0_0); } if let Some((pattern1_0, pattern1_1)) = C::multi_lane(ctx, pattern0_0) { - // Rule at src/isa/x64/inst.isle line 2092. + // Rule at src/isa/x64/inst.isle line 2093. let expr0_0 = SseOpcode::Movdqa; return Some(expr0_0); } @@ -3398,7 +3382,7 @@ pub fn constructor_x64_blend( let pattern1_0 = arg1; let pattern2_0 = arg2; let pattern3_0 = arg3; - // Rule at src/isa/x64/inst.isle line 2096. + // Rule at src/isa/x64/inst.isle line 2097. let expr0_0 = C::xmm0(ctx); let expr1_0 = constructor_sse_mov_op(ctx, pattern0_0)?; let expr2_0 = MInst::XmmUnaryRmR { @@ -3422,7 +3406,7 @@ pub fn constructor_x64_blendvpd( let pattern0_0 = arg0; let pattern1_0 = arg1; let pattern2_0 = arg2; - // Rule at src/isa/x64/inst.isle line 2110. + // Rule at src/isa/x64/inst.isle line 2111. let expr0_0 = C::xmm0(ctx); let expr1_0 = SseOpcode::Movapd; let expr2_0 = C::xmm_to_xmm_mem(ctx, pattern2_0); @@ -3446,7 +3430,7 @@ pub fn constructor_x64_movsd_regmove( ) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 2124. + // Rule at src/isa/x64/inst.isle line 2125. let expr0_0: Type = I8X16; let expr1_0 = SseOpcode::Movsd; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -3457,7 +3441,7 @@ pub fn constructor_x64_movsd_regmove( pub fn constructor_x64_movlhps(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 2129. + // Rule at src/isa/x64/inst.isle line 2130. let expr0_0: Type = I8X16; let expr1_0 = SseOpcode::Movlhps; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -3475,21 +3459,21 @@ pub fn constructor_x64_pmaxs( if pattern0_0 == I8X16 { let pattern2_0 = arg1; let pattern3_0 = arg2; - // Rule at src/isa/x64/inst.isle line 2134. + // Rule at src/isa/x64/inst.isle line 2135. let expr0_0 = constructor_x64_pmaxsb(ctx, pattern2_0, pattern3_0)?; return Some(expr0_0); } if pattern0_0 == I16X8 { let pattern2_0 = arg1; let pattern3_0 = arg2; - // Rule at src/isa/x64/inst.isle line 2135. + // Rule at src/isa/x64/inst.isle line 2136. let expr0_0 = constructor_x64_pmaxsw(ctx, pattern2_0, pattern3_0)?; return Some(expr0_0); } if pattern0_0 == I32X4 { let pattern2_0 = arg1; let pattern3_0 = arg2; - // Rule at src/isa/x64/inst.isle line 2136. + // Rule at src/isa/x64/inst.isle line 2137. let expr0_0 = constructor_x64_pmaxsd(ctx, pattern2_0, pattern3_0)?; return Some(expr0_0); } @@ -3500,7 +3484,7 @@ pub fn constructor_x64_pmaxs( pub fn constructor_x64_pmaxsb(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 2139. + // Rule at src/isa/x64/inst.isle line 2140. let expr0_0: Type = I8X16; let expr1_0 = SseOpcode::Pmaxsb; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -3511,7 +3495,7 @@ pub fn constructor_x64_pmaxsb(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) pub fn constructor_x64_pmaxsw(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 2141. + // Rule at src/isa/x64/inst.isle line 2142. let expr0_0: Type = I8X16; let expr1_0 = SseOpcode::Pmaxsw; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -3522,7 +3506,7 @@ pub fn constructor_x64_pmaxsw(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) pub fn constructor_x64_pmaxsd(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 2143. + // Rule at src/isa/x64/inst.isle line 2144. let expr0_0: Type = I8X16; let expr1_0 = SseOpcode::Pmaxsd; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -3540,21 +3524,21 @@ pub fn constructor_x64_pmins( if pattern0_0 == I8X16 { let pattern2_0 = arg1; let pattern3_0 = arg2; - // Rule at src/isa/x64/inst.isle line 2147. + // Rule at src/isa/x64/inst.isle line 2148. let expr0_0 = constructor_x64_pminsb(ctx, pattern2_0, pattern3_0)?; return Some(expr0_0); } if pattern0_0 == I16X8 { let pattern2_0 = arg1; let pattern3_0 = arg2; - // Rule at src/isa/x64/inst.isle line 2148. + // Rule at src/isa/x64/inst.isle line 2149. let expr0_0 = constructor_x64_pminsw(ctx, pattern2_0, pattern3_0)?; return Some(expr0_0); } if pattern0_0 == I32X4 { let pattern2_0 = arg1; let pattern3_0 = arg2; - // Rule at src/isa/x64/inst.isle line 2149. + // Rule at src/isa/x64/inst.isle line 2150. let expr0_0 = constructor_x64_pminsd(ctx, pattern2_0, pattern3_0)?; return Some(expr0_0); } @@ -3565,7 +3549,7 @@ pub fn constructor_x64_pmins( pub fn constructor_x64_pminsb(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 2152. + // Rule at src/isa/x64/inst.isle line 2153. let expr0_0: Type = I8X16; let expr1_0 = SseOpcode::Pminsb; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -3576,7 +3560,7 @@ pub fn constructor_x64_pminsb(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) pub fn constructor_x64_pminsw(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 2154. + // Rule at src/isa/x64/inst.isle line 2155. let expr0_0: Type = I16X8; let expr1_0 = SseOpcode::Pminsw; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -3587,7 +3571,7 @@ pub fn constructor_x64_pminsw(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) pub fn constructor_x64_pminsd(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 2156. + // Rule at src/isa/x64/inst.isle line 2157. let expr0_0: Type = I32X4; let expr1_0 = SseOpcode::Pminsd; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -3605,21 +3589,21 @@ pub fn constructor_x64_pmaxu( if pattern0_0 == I8X16 { let pattern2_0 = arg1; let pattern3_0 = arg2; - // Rule at src/isa/x64/inst.isle line 2160. + // Rule at src/isa/x64/inst.isle line 2161. let expr0_0 = constructor_x64_pmaxub(ctx, pattern2_0, pattern3_0)?; return Some(expr0_0); } if pattern0_0 == I16X8 { let pattern2_0 = arg1; let pattern3_0 = arg2; - // Rule at src/isa/x64/inst.isle line 2161. + // Rule at src/isa/x64/inst.isle line 2162. let expr0_0 = constructor_x64_pmaxuw(ctx, pattern2_0, pattern3_0)?; return Some(expr0_0); } if pattern0_0 == I32X4 { let pattern2_0 = arg1; let pattern3_0 = arg2; - // Rule at src/isa/x64/inst.isle line 2162. + // Rule at src/isa/x64/inst.isle line 2163. let expr0_0 = constructor_x64_pmaxud(ctx, pattern2_0, pattern3_0)?; return Some(expr0_0); } @@ -3630,7 +3614,7 @@ pub fn constructor_x64_pmaxu( pub fn constructor_x64_pmaxub(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 2165. + // Rule at src/isa/x64/inst.isle line 2166. let expr0_0: Type = I8X16; let expr1_0 = SseOpcode::Pmaxub; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -3641,7 +3625,7 @@ pub fn constructor_x64_pmaxub(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) pub fn constructor_x64_pmaxuw(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 2167. + // Rule at src/isa/x64/inst.isle line 2168. let expr0_0: Type = I8X16; let expr1_0 = SseOpcode::Pmaxuw; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -3652,7 +3636,7 @@ pub fn constructor_x64_pmaxuw(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) pub fn constructor_x64_pmaxud(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 2169. + // Rule at src/isa/x64/inst.isle line 2170. let expr0_0: Type = I8X16; let expr1_0 = SseOpcode::Pmaxud; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -3670,21 +3654,21 @@ pub fn constructor_x64_pminu( if pattern0_0 == I8X16 { let pattern2_0 = arg1; let pattern3_0 = arg2; - // Rule at src/isa/x64/inst.isle line 2173. + // Rule at src/isa/x64/inst.isle line 2174. let expr0_0 = constructor_x64_pminub(ctx, pattern2_0, pattern3_0)?; return Some(expr0_0); } if pattern0_0 == I16X8 { let pattern2_0 = arg1; let pattern3_0 = arg2; - // Rule at src/isa/x64/inst.isle line 2174. + // Rule at src/isa/x64/inst.isle line 2175. let expr0_0 = constructor_x64_pminuw(ctx, pattern2_0, pattern3_0)?; return Some(expr0_0); } if pattern0_0 == I32X4 { let pattern2_0 = arg1; let pattern3_0 = arg2; - // Rule at src/isa/x64/inst.isle line 2175. + // Rule at src/isa/x64/inst.isle line 2176. let expr0_0 = constructor_x64_pminud(ctx, pattern2_0, pattern3_0)?; return Some(expr0_0); } @@ -3695,7 +3679,7 @@ pub fn constructor_x64_pminu( pub fn constructor_x64_pminub(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 2178. + // Rule at src/isa/x64/inst.isle line 2179. let expr0_0: Type = I8X16; let expr1_0 = SseOpcode::Pminub; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -3706,7 +3690,7 @@ pub fn constructor_x64_pminub(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) pub fn constructor_x64_pminuw(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 2180. + // Rule at src/isa/x64/inst.isle line 2181. let expr0_0: Type = I8X16; let expr1_0 = SseOpcode::Pminuw; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -3717,7 +3701,7 @@ pub fn constructor_x64_pminuw(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) pub fn constructor_x64_pminud(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 2182. + // Rule at src/isa/x64/inst.isle line 2183. let expr0_0: Type = I8X16; let expr1_0 = SseOpcode::Pminud; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -3728,7 +3712,7 @@ pub fn constructor_x64_pminud(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) pub fn constructor_x64_punpcklbw(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 2186. + // Rule at src/isa/x64/inst.isle line 2187. let expr0_0: Type = I8X16; let expr1_0 = SseOpcode::Punpcklbw; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -3739,7 +3723,7 @@ pub fn constructor_x64_punpcklbw(ctx: &mut C, arg0: Xmm, arg1: &XmmM pub fn constructor_x64_punpckhbw(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 2191. + // Rule at src/isa/x64/inst.isle line 2192. let expr0_0: Type = I8X16; let expr1_0 = SseOpcode::Punpckhbw; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -3750,7 +3734,7 @@ pub fn constructor_x64_punpckhbw(ctx: &mut C, arg0: Xmm, arg1: &XmmM pub fn constructor_x64_packsswb(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 2196. + // Rule at src/isa/x64/inst.isle line 2197. let expr0_0: Type = I8X16; let expr1_0 = SseOpcode::Packsswb; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -3771,7 +3755,7 @@ pub fn constructor_xmm_rm_r_imm( let pattern2_0 = arg2; let pattern3_0 = arg3; let pattern4_0 = arg4; - // Rule at src/isa/x64/inst.isle line 2201. + // Rule at src/isa/x64/inst.isle line 2202. let expr0_0 = C::temp_writable_xmm(ctx); let expr1_0 = C::writable_xmm_to_reg(ctx, expr0_0); let expr2_0 = MInst::XmmRmRImm { @@ -3799,7 +3783,7 @@ pub fn constructor_x64_palignr( let pattern1_0 = arg1; let pattern2_0 = arg2; let pattern3_0 = arg3; - // Rule at src/isa/x64/inst.isle line 2213. + // Rule at src/isa/x64/inst.isle line 2214. let expr0_0 = SseOpcode::Palignr; let expr1_0 = C::xmm_to_reg(ctx, pattern0_0); let expr2_0 = C::xmm_mem_to_reg_mem(ctx, pattern1_0); @@ -3821,7 +3805,7 @@ pub fn constructor_x64_cmpp( let pattern2_0 = arg1; let pattern3_0 = arg2; let pattern4_0 = arg3; - // Rule at src/isa/x64/inst.isle line 2222. + // Rule at src/isa/x64/inst.isle line 2223. let expr0_0 = constructor_x64_cmpps(ctx, pattern2_0, pattern3_0, pattern4_0)?; return Some(expr0_0); } @@ -3829,7 +3813,7 @@ pub fn constructor_x64_cmpp( let pattern2_0 = arg1; let pattern3_0 = arg2; let pattern4_0 = arg3; - // Rule at src/isa/x64/inst.isle line 2223. + // Rule at src/isa/x64/inst.isle line 2224. let expr0_0 = constructor_x64_cmppd(ctx, pattern2_0, pattern3_0, pattern4_0)?; return Some(expr0_0); } @@ -3846,7 +3830,7 @@ pub fn constructor_x64_cmpps( let pattern0_0 = arg0; let pattern1_0 = arg1; let pattern2_0 = arg2; - // Rule at src/isa/x64/inst.isle line 2226. + // Rule at src/isa/x64/inst.isle line 2227. let expr0_0 = SseOpcode::Cmpps; let expr1_0 = C::xmm_to_reg(ctx, pattern0_0); let expr2_0 = C::xmm_mem_to_reg_mem(ctx, pattern1_0); @@ -3866,7 +3850,7 @@ pub fn constructor_x64_cmppd( let pattern0_0 = arg0; let pattern1_0 = arg1; let pattern2_0 = arg2; - // Rule at src/isa/x64/inst.isle line 2237. + // Rule at src/isa/x64/inst.isle line 2238. let expr0_0 = SseOpcode::Cmppd; let expr1_0 = C::xmm_to_reg(ctx, pattern0_0); let expr2_0 = C::xmm_mem_to_reg_mem(ctx, pattern1_0); @@ -3886,7 +3870,7 @@ pub fn constructor_x64_pinsrb( let pattern0_0 = arg0; let pattern1_0 = arg1; let pattern2_0 = arg2; - // Rule at src/isa/x64/inst.isle line 2246. + // Rule at src/isa/x64/inst.isle line 2247. let expr0_0 = SseOpcode::Pinsrb; let expr1_0 = C::xmm_to_reg(ctx, pattern0_0); let expr2_0 = C::gpr_mem_to_reg_mem(ctx, pattern1_0); @@ -3905,7 +3889,7 @@ pub fn constructor_x64_pinsrw( let pattern0_0 = arg0; let pattern1_0 = arg1; let pattern2_0 = arg2; - // Rule at src/isa/x64/inst.isle line 2255. + // Rule at src/isa/x64/inst.isle line 2256. let expr0_0 = SseOpcode::Pinsrw; let expr1_0 = C::xmm_to_reg(ctx, pattern0_0); let expr2_0 = C::gpr_mem_to_reg_mem(ctx, pattern1_0); @@ -3926,7 +3910,7 @@ pub fn constructor_x64_pinsrd( let pattern1_0 = arg1; let pattern2_0 = arg2; let pattern3_0 = arg3; - // Rule at src/isa/x64/inst.isle line 2264. + // Rule at src/isa/x64/inst.isle line 2265. let expr0_0 = SseOpcode::Pinsrd; let expr1_0 = C::xmm_to_reg(ctx, pattern0_0); let expr2_0 = C::gpr_mem_to_reg_mem(ctx, pattern1_0); @@ -3939,7 +3923,7 @@ pub fn constructor_x64_pinsrd( pub fn constructor_x64_pmaddwd(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 2273. + // Rule at src/isa/x64/inst.isle line 2274. let expr0_0 = C::temp_writable_xmm(ctx); let expr1_0 = SseOpcode::Pmaddwd; let expr2_0 = MInst::XmmRmR { @@ -3963,7 +3947,7 @@ pub fn constructor_x64_insertps( let pattern0_0 = arg0; let pattern1_0 = arg1; let pattern2_0 = arg2; - // Rule at src/isa/x64/inst.isle line 2283. + // Rule at src/isa/x64/inst.isle line 2284. let expr0_0 = SseOpcode::Insertps; let expr1_0 = C::xmm_to_reg(ctx, pattern0_0); let expr2_0 = C::xmm_mem_to_reg_mem(ctx, pattern1_0); @@ -3982,7 +3966,7 @@ pub fn constructor_x64_pshufd( let pattern0_0 = arg0; let pattern1_0 = arg1; let pattern2_0 = arg2; - // Rule at src/isa/x64/inst.isle line 2292. + // Rule at src/isa/x64/inst.isle line 2293. let expr0_0 = C::temp_writable_xmm(ctx); let expr1_0 = SseOpcode::Pshufd; let expr2_0 = constructor_writable_xmm_to_r_reg(ctx, expr0_0)?; @@ -4005,7 +3989,7 @@ pub fn constructor_x64_pshufd( pub fn constructor_x64_pshufb(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 2304. + // Rule at src/isa/x64/inst.isle line 2305. let expr0_0 = C::temp_writable_xmm(ctx); let expr1_0 = SseOpcode::Pshufb; let expr2_0 = MInst::XmmRmR { @@ -4027,7 +4011,7 @@ pub fn constructor_xmm_unary_rm_r( ) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 2314. + // Rule at src/isa/x64/inst.isle line 2315. let expr0_0 = C::temp_writable_xmm(ctx); let expr1_0 = MInst::XmmUnaryRmR { op: pattern0_0.clone(), @@ -4042,7 +4026,7 @@ pub fn constructor_xmm_unary_rm_r( // Generated as internal constructor for term x64_pabsb. pub fn constructor_x64_pabsb(ctx: &mut C, arg0: &XmmMem) -> Option { let pattern0_0 = arg0; - // Rule at src/isa/x64/inst.isle line 2321. + // Rule at src/isa/x64/inst.isle line 2322. let expr0_0 = SseOpcode::Pabsb; let expr1_0 = constructor_xmm_unary_rm_r(ctx, &expr0_0, pattern0_0)?; return Some(expr1_0); @@ -4051,7 +4035,7 @@ pub fn constructor_x64_pabsb(ctx: &mut C, arg0: &XmmMem) -> Option(ctx: &mut C, arg0: &XmmMem) -> Option { let pattern0_0 = arg0; - // Rule at src/isa/x64/inst.isle line 2326. + // Rule at src/isa/x64/inst.isle line 2327. let expr0_0 = SseOpcode::Pabsw; let expr1_0 = constructor_xmm_unary_rm_r(ctx, &expr0_0, pattern0_0)?; return Some(expr1_0); @@ -4060,7 +4044,7 @@ pub fn constructor_x64_pabsw(ctx: &mut C, arg0: &XmmMem) -> Option(ctx: &mut C, arg0: &XmmMem) -> Option { let pattern0_0 = arg0; - // Rule at src/isa/x64/inst.isle line 2331. + // Rule at src/isa/x64/inst.isle line 2332. let expr0_0 = SseOpcode::Pabsd; let expr1_0 = constructor_xmm_unary_rm_r(ctx, &expr0_0, pattern0_0)?; return Some(expr1_0); @@ -4074,7 +4058,7 @@ pub fn constructor_xmm_unary_rm_r_evex( ) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 2336. + // Rule at src/isa/x64/inst.isle line 2337. let expr0_0 = C::temp_writable_xmm(ctx); let expr1_0 = MInst::XmmUnaryRmREvex { op: pattern0_0.clone(), @@ -4089,7 +4073,7 @@ pub fn constructor_xmm_unary_rm_r_evex( // Generated as internal constructor for term x64_vpabsq. pub fn constructor_x64_vpabsq(ctx: &mut C, arg0: &XmmMem) -> Option { let pattern0_0 = arg0; - // Rule at src/isa/x64/inst.isle line 2343. + // Rule at src/isa/x64/inst.isle line 2344. let expr0_0 = Avx512Opcode::Vpabsq; let expr1_0 = constructor_xmm_unary_rm_r_evex(ctx, &expr0_0, pattern0_0)?; return Some(expr1_0); @@ -4098,7 +4082,7 @@ pub fn constructor_x64_vpabsq(ctx: &mut C, arg0: &XmmMem) -> Option< // Generated as internal constructor for term x64_vpopcntb. pub fn constructor_x64_vpopcntb(ctx: &mut C, arg0: &XmmMem) -> Option { let pattern0_0 = arg0; - // Rule at src/isa/x64/inst.isle line 2348. + // Rule at src/isa/x64/inst.isle line 2349. let expr0_0 = Avx512Opcode::Vpopcntb; let expr1_0 = constructor_xmm_unary_rm_r_evex(ctx, &expr0_0, pattern0_0)?; return Some(expr1_0); @@ -4114,7 +4098,7 @@ pub fn constructor_xmm_rm_r_evex( let pattern0_0 = arg0; let pattern1_0 = arg1; let pattern2_0 = arg2; - // Rule at src/isa/x64/inst.isle line 2353. + // Rule at src/isa/x64/inst.isle line 2354. let expr0_0 = C::temp_writable_xmm(ctx); let expr1_0 = MInst::XmmRmREvex { op: pattern0_0.clone(), @@ -4131,7 +4115,7 @@ pub fn constructor_xmm_rm_r_evex( pub fn constructor_x64_vpmullq(ctx: &mut C, arg0: &XmmMem, arg1: Xmm) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 2365. + // Rule at src/isa/x64/inst.isle line 2366. let expr0_0 = Avx512Opcode::Vpmullq; let expr1_0 = constructor_xmm_rm_r_evex(ctx, &expr0_0, pattern0_0, pattern1_0)?; return Some(expr1_0); @@ -4149,7 +4133,7 @@ pub fn constructor_mul_hi( let pattern1_0 = arg1; let pattern2_0 = arg2; let pattern3_0 = arg3; - // Rule at src/isa/x64/inst.isle line 2374. + // Rule at src/isa/x64/inst.isle line 2375. let expr0_0 = C::temp_writable_gpr(ctx); let expr1_0 = C::temp_writable_gpr(ctx); let expr2_0 = C::operand_size_of_type_32_64(ctx, pattern0_0); @@ -4178,7 +4162,7 @@ pub fn constructor_mulhi_u( let pattern0_0 = arg0; let pattern1_0 = arg1; let pattern2_0 = arg2; - // Rule at src/isa/x64/inst.isle line 2389. + // Rule at src/isa/x64/inst.isle line 2390. let expr0_0: bool = false; let expr1_0 = constructor_mul_hi(ctx, pattern0_0, expr0_0, pattern1_0, pattern2_0)?; return Some(expr1_0); @@ -4194,7 +4178,7 @@ pub fn constructor_xmm_rmi_xmm( let pattern0_0 = arg0; let pattern1_0 = arg1; let pattern2_0 = arg2; - // Rule at src/isa/x64/inst.isle line 2394. + // Rule at src/isa/x64/inst.isle line 2395. let expr0_0 = C::temp_writable_xmm(ctx); let expr1_0 = MInst::XmmRmiReg { opcode: pattern0_0.clone(), @@ -4211,7 +4195,7 @@ pub fn constructor_xmm_rmi_xmm( pub fn constructor_x64_psllw(ctx: &mut C, arg0: Xmm, arg1: &XmmMemImm) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 2404. + // Rule at src/isa/x64/inst.isle line 2405. let expr0_0 = SseOpcode::Psllw; let expr1_0 = constructor_xmm_rmi_xmm(ctx, &expr0_0, pattern0_0, pattern1_0)?; return Some(expr1_0); @@ -4221,7 +4205,7 @@ pub fn constructor_x64_psllw(ctx: &mut C, arg0: Xmm, arg1: &XmmMemIm pub fn constructor_x64_pslld(ctx: &mut C, arg0: Xmm, arg1: &XmmMemImm) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 2409. + // Rule at src/isa/x64/inst.isle line 2410. let expr0_0 = SseOpcode::Pslld; let expr1_0 = constructor_xmm_rmi_xmm(ctx, &expr0_0, pattern0_0, pattern1_0)?; return Some(expr1_0); @@ -4231,7 +4215,7 @@ pub fn constructor_x64_pslld(ctx: &mut C, arg0: Xmm, arg1: &XmmMemIm pub fn constructor_x64_psllq(ctx: &mut C, arg0: Xmm, arg1: &XmmMemImm) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 2414. + // Rule at src/isa/x64/inst.isle line 2415. let expr0_0 = SseOpcode::Psllq; let expr1_0 = constructor_xmm_rmi_xmm(ctx, &expr0_0, pattern0_0, pattern1_0)?; return Some(expr1_0); @@ -4241,7 +4225,7 @@ pub fn constructor_x64_psllq(ctx: &mut C, arg0: Xmm, arg1: &XmmMemIm pub fn constructor_x64_psrlw(ctx: &mut C, arg0: Xmm, arg1: &XmmMemImm) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 2419. + // Rule at src/isa/x64/inst.isle line 2420. let expr0_0 = SseOpcode::Psrlw; let expr1_0 = constructor_xmm_rmi_xmm(ctx, &expr0_0, pattern0_0, pattern1_0)?; return Some(expr1_0); @@ -4251,7 +4235,7 @@ pub fn constructor_x64_psrlw(ctx: &mut C, arg0: Xmm, arg1: &XmmMemIm pub fn constructor_x64_psrld(ctx: &mut C, arg0: Xmm, arg1: &XmmMemImm) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 2424. + // Rule at src/isa/x64/inst.isle line 2425. let expr0_0 = SseOpcode::Psrld; let expr1_0 = constructor_xmm_rmi_xmm(ctx, &expr0_0, pattern0_0, pattern1_0)?; return Some(expr1_0); @@ -4261,7 +4245,7 @@ pub fn constructor_x64_psrld(ctx: &mut C, arg0: Xmm, arg1: &XmmMemIm pub fn constructor_x64_psrlq(ctx: &mut C, arg0: Xmm, arg1: &XmmMemImm) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 2429. + // Rule at src/isa/x64/inst.isle line 2430. let expr0_0 = SseOpcode::Psrlq; let expr1_0 = constructor_xmm_rmi_xmm(ctx, &expr0_0, pattern0_0, pattern1_0)?; return Some(expr1_0); @@ -4271,7 +4255,7 @@ pub fn constructor_x64_psrlq(ctx: &mut C, arg0: Xmm, arg1: &XmmMemIm pub fn constructor_x64_psraw(ctx: &mut C, arg0: Xmm, arg1: &XmmMemImm) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 2434. + // Rule at src/isa/x64/inst.isle line 2435. let expr0_0 = SseOpcode::Psraw; let expr1_0 = constructor_xmm_rmi_xmm(ctx, &expr0_0, pattern0_0, pattern1_0)?; return Some(expr1_0); @@ -4281,7 +4265,7 @@ pub fn constructor_x64_psraw(ctx: &mut C, arg0: Xmm, arg1: &XmmMemIm pub fn constructor_x64_psrad(ctx: &mut C, arg0: Xmm, arg1: &XmmMemImm) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 2439. + // Rule at src/isa/x64/inst.isle line 2440. let expr0_0 = SseOpcode::Psrad; let expr1_0 = constructor_xmm_rmi_xmm(ctx, &expr0_0, pattern0_0, pattern1_0)?; return Some(expr1_0); @@ -4297,7 +4281,7 @@ pub fn constructor_x64_pextrd( let pattern0_0 = arg0; let pattern1_0 = arg1; let pattern2_0 = arg2; - // Rule at src/isa/x64/inst.isle line 2444. + // Rule at src/isa/x64/inst.isle line 2445. let expr0_0 = C::temp_writable_gpr(ctx); let expr1_0 = SseOpcode::Pextrd; let expr2_0 = constructor_writable_gpr_to_r_reg(ctx, expr0_0)?; @@ -4330,7 +4314,7 @@ pub fn constructor_gpr_to_xmm( let pattern0_0 = arg0; let pattern1_0 = arg1; let pattern2_0 = arg2; - // Rule at src/isa/x64/inst.isle line 2456. + // Rule at src/isa/x64/inst.isle line 2457. let expr0_0 = C::temp_writable_xmm(ctx); let expr1_0 = MInst::GprToXmm { op: pattern0_0.clone(), @@ -4347,7 +4331,7 @@ pub fn constructor_gpr_to_xmm( pub fn constructor_x64_not(ctx: &mut C, arg0: Type, arg1: Gpr) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 2463. + // Rule at src/isa/x64/inst.isle line 2464. let expr0_0 = C::temp_writable_gpr(ctx); let expr1_0 = C::operand_size_of_type_32_64(ctx, pattern0_0); let expr2_0 = MInst::Not { @@ -4364,7 +4348,7 @@ pub fn constructor_x64_not(ctx: &mut C, arg0: Type, arg1: Gpr) -> Op pub fn constructor_x64_neg(ctx: &mut C, arg0: Type, arg1: Gpr) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 2471. + // Rule at src/isa/x64/inst.isle line 2472. let expr0_0 = C::temp_writable_gpr(ctx); let expr1_0 = C::operand_size_of_type_32_64(ctx, pattern0_0); let expr2_0 = MInst::Neg { @@ -4380,7 +4364,7 @@ pub fn constructor_x64_neg(ctx: &mut C, arg0: Type, arg1: Gpr) -> Op // Generated as internal constructor for term x64_lea. pub fn constructor_x64_lea(ctx: &mut C, arg0: &SyntheticAmode) -> Option { let pattern0_0 = arg0; - // Rule at src/isa/x64/inst.isle line 2478. + // Rule at src/isa/x64/inst.isle line 2479. let expr0_0 = C::temp_writable_gpr(ctx); let expr1_0 = MInst::LoadEffectiveAddress { addr: pattern0_0.clone(), @@ -4394,7 +4378,7 @@ pub fn constructor_x64_lea(ctx: &mut C, arg0: &SyntheticAmode) -> Op // Generated as internal constructor for term x64_ud2. pub fn constructor_x64_ud2(ctx: &mut C, arg0: &TrapCode) -> Option { let pattern0_0 = arg0; - // Rule at src/isa/x64/inst.isle line 2485. + // Rule at src/isa/x64/inst.isle line 2486. let expr0_0 = MInst::Ud2 { trap_code: pattern0_0.clone(), }; @@ -4404,7 +4388,7 @@ pub fn constructor_x64_ud2(ctx: &mut C, arg0: &TrapCode) -> Option(ctx: &mut C) -> Option { - // Rule at src/isa/x64/inst.isle line 2490. + // Rule at src/isa/x64/inst.isle line 2491. let expr0_0 = MInst::Hlt; let expr1_0 = SideEffectNoResult::Inst { inst: expr0_0 }; return Some(expr1_0); @@ -4414,7 +4398,7 @@ pub fn constructor_x64_hlt(ctx: &mut C) -> Option(ctx: &mut C, arg0: Type, arg1: Gpr) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 2495. + // Rule at src/isa/x64/inst.isle line 2496. let expr0_0 = C::temp_writable_gpr(ctx); let expr1_0 = C::operand_size_of_type_32_64(ctx, pattern0_0); let expr2_0 = UnaryRmROpcode::Lzcnt; @@ -4434,7 +4418,7 @@ pub fn constructor_x64_lzcnt(ctx: &mut C, arg0: Type, arg1: Gpr) -> pub fn constructor_x64_tzcnt(ctx: &mut C, arg0: Type, arg1: Gpr) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 2503. + // Rule at src/isa/x64/inst.isle line 2504. let expr0_0 = C::temp_writable_gpr(ctx); let expr1_0 = C::operand_size_of_type_32_64(ctx, pattern0_0); let expr2_0 = UnaryRmROpcode::Tzcnt; @@ -4458,7 +4442,7 @@ pub fn constructor_x64_bsr( ) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 2511. + // Rule at src/isa/x64/inst.isle line 2512. let expr0_0 = C::temp_writable_gpr(ctx); let expr1_0 = C::operand_size_of_type_32_64(ctx, pattern0_0); let expr2_0 = UnaryRmROpcode::Bsr; @@ -4487,7 +4471,7 @@ pub fn constructor_bsr_or_else( let pattern0_0 = arg0; let pattern1_0 = arg1; let pattern2_0 = arg2; - // Rule at src/isa/x64/inst.isle line 2520. + // Rule at src/isa/x64/inst.isle line 2521. let expr0_0 = constructor_x64_bsr(ctx, pattern0_0, pattern1_0)?; let expr1_0 = constructor_produces_flags_get_reg(ctx, &expr0_0)?; let expr2_0 = C::gpr_new(ctx, expr1_0); @@ -4508,7 +4492,7 @@ pub fn constructor_x64_bsf( ) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 2531. + // Rule at src/isa/x64/inst.isle line 2532. let expr0_0 = C::temp_writable_gpr(ctx); let expr1_0 = C::operand_size_of_type_32_64(ctx, pattern0_0); let expr2_0 = UnaryRmROpcode::Bsf; @@ -4537,7 +4521,7 @@ pub fn constructor_bsf_or_else( let pattern0_0 = arg0; let pattern1_0 = arg1; let pattern2_0 = arg2; - // Rule at src/isa/x64/inst.isle line 2540. + // Rule at src/isa/x64/inst.isle line 2541. let expr0_0 = constructor_x64_bsf(ctx, pattern0_0, pattern1_0)?; let expr1_0 = constructor_produces_flags_get_reg(ctx, &expr0_0)?; let expr2_0 = C::gpr_new(ctx, expr1_0); @@ -4554,7 +4538,7 @@ pub fn constructor_bsf_or_else( pub fn constructor_x64_popcnt(ctx: &mut C, arg0: Type, arg1: Gpr) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 2551. + // Rule at src/isa/x64/inst.isle line 2552. let expr0_0 = C::temp_writable_gpr(ctx); let expr1_0 = C::operand_size_of_type_32_64(ctx, pattern0_0); let expr2_0 = UnaryRmROpcode::Popcnt; @@ -4582,7 +4566,7 @@ pub fn constructor_xmm_min_max_seq( let pattern1_0 = arg1; let pattern2_0 = arg2; let pattern3_0 = arg3; - // Rule at src/isa/x64/inst.isle line 2559. + // Rule at src/isa/x64/inst.isle line 2560. let expr0_0 = C::temp_writable_xmm(ctx); let expr1_0 = C::operand_size_of_type_32_64(ctx, pattern0_0); let expr2_0 = MInst::XmmMinMaxSeq { @@ -4601,7 +4585,7 @@ pub fn constructor_xmm_min_max_seq( pub fn constructor_x64_minss(ctx: &mut C, arg0: Xmm, arg1: Xmm) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 2567. + // Rule at src/isa/x64/inst.isle line 2568. let expr0_0 = C::temp_writable_xmm(ctx); let expr1_0 = SseOpcode::Minss; let expr2_0 = C::xmm_to_xmm_mem(ctx, pattern1_0); @@ -4620,7 +4604,7 @@ pub fn constructor_x64_minss(ctx: &mut C, arg0: Xmm, arg1: Xmm) -> O pub fn constructor_x64_minsd(ctx: &mut C, arg0: Xmm, arg1: Xmm) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 2574. + // Rule at src/isa/x64/inst.isle line 2575. let expr0_0 = C::temp_writable_xmm(ctx); let expr1_0 = SseOpcode::Minsd; let expr2_0 = C::xmm_to_xmm_mem(ctx, pattern1_0); @@ -4639,7 +4623,7 @@ pub fn constructor_x64_minsd(ctx: &mut C, arg0: Xmm, arg1: Xmm) -> O pub fn constructor_x64_minps(ctx: &mut C, arg0: Xmm, arg1: Xmm) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 2582. + // Rule at src/isa/x64/inst.isle line 2583. let expr0_0 = C::temp_writable_xmm(ctx); let expr1_0 = SseOpcode::Minps; let expr2_0 = C::xmm_to_xmm_mem(ctx, pattern1_0); @@ -4658,7 +4642,7 @@ pub fn constructor_x64_minps(ctx: &mut C, arg0: Xmm, arg1: Xmm) -> O pub fn constructor_x64_minpd(ctx: &mut C, arg0: Xmm, arg1: Xmm) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 2589. + // Rule at src/isa/x64/inst.isle line 2590. let expr0_0 = C::temp_writable_xmm(ctx); let expr1_0 = SseOpcode::Minpd; let expr2_0 = C::xmm_to_xmm_mem(ctx, pattern1_0); @@ -4677,7 +4661,7 @@ pub fn constructor_x64_minpd(ctx: &mut C, arg0: Xmm, arg1: Xmm) -> O pub fn constructor_x64_maxss(ctx: &mut C, arg0: Xmm, arg1: Xmm) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 2596. + // Rule at src/isa/x64/inst.isle line 2597. let expr0_0 = C::temp_writable_xmm(ctx); let expr1_0 = SseOpcode::Maxss; let expr2_0 = C::xmm_to_xmm_mem(ctx, pattern1_0); @@ -4696,7 +4680,7 @@ pub fn constructor_x64_maxss(ctx: &mut C, arg0: Xmm, arg1: Xmm) -> O pub fn constructor_x64_maxsd(ctx: &mut C, arg0: Xmm, arg1: Xmm) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 2603. + // Rule at src/isa/x64/inst.isle line 2604. let expr0_0 = C::temp_writable_xmm(ctx); let expr1_0 = SseOpcode::Maxsd; let expr2_0 = C::xmm_to_xmm_mem(ctx, pattern1_0); @@ -4715,7 +4699,7 @@ pub fn constructor_x64_maxsd(ctx: &mut C, arg0: Xmm, arg1: Xmm) -> O pub fn constructor_x64_maxps(ctx: &mut C, arg0: Xmm, arg1: Xmm) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 2610. + // Rule at src/isa/x64/inst.isle line 2611. let expr0_0 = C::temp_writable_xmm(ctx); let expr1_0 = SseOpcode::Maxps; let expr2_0 = C::xmm_to_xmm_mem(ctx, pattern1_0); @@ -4734,7 +4718,7 @@ pub fn constructor_x64_maxps(ctx: &mut C, arg0: Xmm, arg1: Xmm) -> O pub fn constructor_x64_maxpd(ctx: &mut C, arg0: Xmm, arg1: Xmm) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 2617. + // Rule at src/isa/x64/inst.isle line 2618. let expr0_0 = C::temp_writable_xmm(ctx); let expr1_0 = SseOpcode::Maxpd; let expr2_0 = C::xmm_to_xmm_mem(ctx, pattern1_0); @@ -4760,28 +4744,28 @@ pub fn constructor_x64_pcmpeq( if pattern0_0 == I8X16 { let pattern2_0 = arg1; let pattern3_0 = arg2; - // Rule at src/isa/x64/inst.isle line 2624. + // Rule at src/isa/x64/inst.isle line 2625. let expr0_0 = constructor_x64_pcmpeqb(ctx, pattern2_0, pattern3_0)?; return Some(expr0_0); } if pattern0_0 == I16X8 { let pattern2_0 = arg1; let pattern3_0 = arg2; - // Rule at src/isa/x64/inst.isle line 2625. + // Rule at src/isa/x64/inst.isle line 2626. let expr0_0 = constructor_x64_pcmpeqw(ctx, pattern2_0, pattern3_0)?; return Some(expr0_0); } if pattern0_0 == I32X4 { let pattern2_0 = arg1; let pattern3_0 = arg2; - // Rule at src/isa/x64/inst.isle line 2626. + // Rule at src/isa/x64/inst.isle line 2627. let expr0_0 = constructor_x64_pcmpeqd(ctx, pattern2_0, pattern3_0)?; return Some(expr0_0); } if pattern0_0 == I64X2 { let pattern2_0 = arg1; let pattern3_0 = arg2; - // Rule at src/isa/x64/inst.isle line 2627. + // Rule at src/isa/x64/inst.isle line 2628. let expr0_0 = constructor_x64_pcmpeqq(ctx, pattern2_0, pattern3_0)?; return Some(expr0_0); } @@ -4792,7 +4776,7 @@ pub fn constructor_x64_pcmpeq( pub fn constructor_x64_pcmpeqb(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 2630. + // Rule at src/isa/x64/inst.isle line 2631. let expr0_0: Type = I8X16; let expr1_0 = SseOpcode::Pcmpeqb; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -4803,7 +4787,7 @@ pub fn constructor_x64_pcmpeqb(ctx: &mut C, arg0: Xmm, arg1: &XmmMem pub fn constructor_x64_pcmpeqw(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 2632. + // Rule at src/isa/x64/inst.isle line 2633. let expr0_0: Type = I16X8; let expr1_0 = SseOpcode::Pcmpeqw; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -4814,7 +4798,7 @@ pub fn constructor_x64_pcmpeqw(ctx: &mut C, arg0: Xmm, arg1: &XmmMem pub fn constructor_x64_pcmpeqd(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 2634. + // Rule at src/isa/x64/inst.isle line 2635. let expr0_0: Type = I32X4; let expr1_0 = SseOpcode::Pcmpeqd; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -4825,7 +4809,7 @@ pub fn constructor_x64_pcmpeqd(ctx: &mut C, arg0: Xmm, arg1: &XmmMem pub fn constructor_x64_pcmpeqq(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 2636. + // Rule at src/isa/x64/inst.isle line 2637. let expr0_0: Type = I64X2; let expr1_0 = SseOpcode::Pcmpeqq; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -4843,28 +4827,28 @@ pub fn constructor_x64_pcmpgt( if pattern0_0 == I8X16 { let pattern2_0 = arg1; let pattern3_0 = arg2; - // Rule at src/isa/x64/inst.isle line 2640. + // Rule at src/isa/x64/inst.isle line 2641. let expr0_0 = constructor_x64_pcmpgtb(ctx, pattern2_0, pattern3_0)?; return Some(expr0_0); } if pattern0_0 == I16X8 { let pattern2_0 = arg1; let pattern3_0 = arg2; - // Rule at src/isa/x64/inst.isle line 2641. + // Rule at src/isa/x64/inst.isle line 2642. let expr0_0 = constructor_x64_pcmpgtw(ctx, pattern2_0, pattern3_0)?; return Some(expr0_0); } if pattern0_0 == I32X4 { let pattern2_0 = arg1; let pattern3_0 = arg2; - // Rule at src/isa/x64/inst.isle line 2642. + // Rule at src/isa/x64/inst.isle line 2643. let expr0_0 = constructor_x64_pcmpgtd(ctx, pattern2_0, pattern3_0)?; return Some(expr0_0); } if pattern0_0 == I64X2 { let pattern2_0 = arg1; let pattern3_0 = arg2; - // Rule at src/isa/x64/inst.isle line 2643. + // Rule at src/isa/x64/inst.isle line 2644. let expr0_0 = constructor_x64_pcmpgtq(ctx, pattern2_0, pattern3_0)?; return Some(expr0_0); } @@ -4875,7 +4859,7 @@ pub fn constructor_x64_pcmpgt( pub fn constructor_x64_pcmpgtb(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 2646. + // Rule at src/isa/x64/inst.isle line 2647. let expr0_0: Type = I8X16; let expr1_0 = SseOpcode::Pcmpgtb; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -4886,7 +4870,7 @@ pub fn constructor_x64_pcmpgtb(ctx: &mut C, arg0: Xmm, arg1: &XmmMem pub fn constructor_x64_pcmpgtw(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 2648. + // Rule at src/isa/x64/inst.isle line 2649. let expr0_0: Type = I16X8; let expr1_0 = SseOpcode::Pcmpgtw; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -4897,7 +4881,7 @@ pub fn constructor_x64_pcmpgtw(ctx: &mut C, arg0: Xmm, arg1: &XmmMem pub fn constructor_x64_pcmpgtd(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 2650. + // Rule at src/isa/x64/inst.isle line 2651. let expr0_0: Type = I32X4; let expr1_0 = SseOpcode::Pcmpgtd; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -4908,7 +4892,7 @@ pub fn constructor_x64_pcmpgtd(ctx: &mut C, arg0: Xmm, arg1: &XmmMem pub fn constructor_x64_pcmpgtq(ctx: &mut C, arg0: Xmm, arg1: &XmmMem) -> Option { let pattern0_0 = arg0; let pattern1_0 = arg1; - // Rule at src/isa/x64/inst.isle line 2652. + // Rule at src/isa/x64/inst.isle line 2653. let expr0_0: Type = I64X2; let expr1_0 = SseOpcode::Pcmpgtq; let expr2_0 = constructor_xmm_rm_r(ctx, expr0_0, &expr1_0, pattern0_0, pattern1_0)?; @@ -4918,7 +4902,7 @@ pub fn constructor_x64_pcmpgtq(ctx: &mut C, arg0: Xmm, arg1: &XmmMem // Generated as internal constructor for term reg_to_xmm_mem. pub fn constructor_reg_to_xmm_mem(ctx: &mut C, arg0: Reg) -> Option { let pattern0_0 = arg0; - // Rule at src/isa/x64/inst.isle line 2708. + // Rule at src/isa/x64/inst.isle line 2709. let expr0_0 = C::xmm_new(ctx, pattern0_0); let expr1_0 = C::xmm_to_xmm_mem(ctx, expr0_0); return Some(expr1_0); @@ -4927,7 +4911,7 @@ pub fn constructor_reg_to_xmm_mem(ctx: &mut C, arg0: Reg) -> Option< // Generated as internal constructor for term xmm_to_reg_mem. pub fn constructor_xmm_to_reg_mem(ctx: &mut C, arg0: Reg) -> Option { let pattern0_0 = arg0; - // Rule at src/isa/x64/inst.isle line 2711. + // Rule at src/isa/x64/inst.isle line 2712. let expr0_0 = C::xmm_new(ctx, pattern0_0); let expr1_0 = C::xmm_to_reg(ctx, expr0_0); let expr2_0 = RegMem::Reg { reg: expr1_0 }; @@ -4941,7 +4925,7 @@ pub fn constructor_writable_gpr_to_r_reg( arg0: WritableGpr, ) -> Option { let pattern0_0 = arg0; - // Rule at src/isa/x64/inst.isle line 2715. + // Rule at src/isa/x64/inst.isle line 2716. let expr0_0 = C::writable_gpr_to_reg(ctx, pattern0_0); let expr1_0 = C::writable_reg_to_reg(ctx, expr0_0); return Some(expr1_0); @@ -4953,7 +4937,7 @@ pub fn constructor_writable_gpr_to_gpr_mem( arg0: WritableGpr, ) -> Option { let pattern0_0 = arg0; - // Rule at src/isa/x64/inst.isle line 2718. + // Rule at src/isa/x64/inst.isle line 2719. let expr0_0 = C::writable_gpr_to_gpr(ctx, pattern0_0); let expr1_0 = C::gpr_to_gpr_mem(ctx, expr0_0); return Some(expr1_0); @@ -4965,7 +4949,7 @@ pub fn constructor_writable_gpr_to_value_regs( arg0: WritableGpr, ) -> Option { let pattern0_0 = arg0; - // Rule at src/isa/x64/inst.isle line 2721. + // Rule at src/isa/x64/inst.isle line 2722. let expr0_0 = constructor_writable_gpr_to_r_reg(ctx, pattern0_0)?; let expr1_0 = C::value_reg(ctx, expr0_0); return Some(expr1_0); @@ -4977,7 +4961,7 @@ pub fn constructor_writable_xmm_to_r_reg( arg0: WritableXmm, ) -> Option { let pattern0_0 = arg0; - // Rule at src/isa/x64/inst.isle line 2724. + // Rule at src/isa/x64/inst.isle line 2725. let expr0_0 = C::writable_xmm_to_reg(ctx, pattern0_0); let expr1_0 = C::writable_reg_to_reg(ctx, expr0_0); return Some(expr1_0); @@ -4989,7 +4973,7 @@ pub fn constructor_writable_xmm_to_xmm_mem( arg0: WritableXmm, ) -> Option { let pattern0_0 = arg0; - // Rule at src/isa/x64/inst.isle line 2727. + // Rule at src/isa/x64/inst.isle line 2728. let expr0_0 = C::writable_xmm_to_xmm(ctx, pattern0_0); let expr1_0 = C::xmm_to_xmm_mem(ctx, expr0_0); return Some(expr1_0); @@ -5001,7 +4985,7 @@ pub fn constructor_writable_xmm_to_value_regs( arg0: WritableXmm, ) -> Option { let pattern0_0 = arg0; - // Rule at src/isa/x64/inst.isle line 2730. + // Rule at src/isa/x64/inst.isle line 2731. let expr0_0 = constructor_writable_xmm_to_r_reg(ctx, pattern0_0)?; let expr1_0 = C::value_reg(ctx, expr0_0); return Some(expr1_0); @@ -5013,7 +4997,7 @@ pub fn constructor_synthetic_amode_to_gpr_mem( arg0: &SyntheticAmode, ) -> Option { let pattern0_0 = arg0; - // Rule at src/isa/x64/inst.isle line 2737. + // Rule at src/isa/x64/inst.isle line 2738. let expr0_0 = C::synthetic_amode_to_reg_mem(ctx, pattern0_0); let expr1_0 = C::reg_mem_to_gpr_mem(ctx, &expr0_0); return Some(expr1_0); @@ -5022,7 +5006,7 @@ pub fn constructor_synthetic_amode_to_gpr_mem( // Generated as internal constructor for term amode_to_gpr_mem. pub fn constructor_amode_to_gpr_mem(ctx: &mut C, arg0: &Amode) -> Option { let pattern0_0 = arg0; - // Rule at src/isa/x64/inst.isle line 2735. + // Rule at src/isa/x64/inst.isle line 2736. let expr0_0 = C::amode_to_synthetic_amode(ctx, pattern0_0); let expr1_0 = constructor_synthetic_amode_to_gpr_mem(ctx, &expr0_0)?; return Some(expr1_0); @@ -5031,7 +5015,7 @@ pub fn constructor_amode_to_gpr_mem(ctx: &mut C, arg0: &Amode) -> Op // Generated as internal constructor for term amode_to_xmm_mem. pub fn constructor_amode_to_xmm_mem(ctx: &mut C, arg0: &Amode) -> Option { let pattern0_0 = arg0; - // Rule at src/isa/x64/inst.isle line 2740. + // Rule at src/isa/x64/inst.isle line 2741. let expr0_0 = C::amode_to_synthetic_amode(ctx, pattern0_0); let expr1_0 = constructor_synthetic_amode_to_xmm_mem(ctx, &expr0_0)?; return Some(expr1_0); @@ -5043,7 +5027,7 @@ pub fn constructor_synthetic_amode_to_xmm_mem( arg0: &SyntheticAmode, ) -> Option { let pattern0_0 = arg0; - // Rule at src/isa/x64/inst.isle line 2743. + // Rule at src/isa/x64/inst.isle line 2744. let expr0_0 = C::synthetic_amode_to_reg_mem(ctx, pattern0_0); let expr1_0 = C::reg_mem_to_xmm_mem(ctx, &expr0_0); return Some(expr1_0); @@ -5228,13 +5212,13 @@ pub fn constructor_lower(ctx: &mut C, arg0: Inst) -> Option { // Rule at src/isa/x64/lower.isle line 1434. let expr0_0 = constructor_x64_ud2(ctx, pattern2_1)?; - let expr1_0 = constructor_safepoint(ctx, &expr0_0)?; + let expr1_0 = constructor_side_effect(ctx, &expr0_0)?; return Some(expr1_0); } &Opcode::ResumableTrap => { // Rule at src/isa/x64/lower.isle line 1439. let expr0_0 = constructor_x64_ud2(ctx, pattern2_1)?; - let expr1_0 = constructor_safepoint(ctx, &expr0_0)?; + let expr1_0 = constructor_side_effect(ctx, &expr0_0)?; return Some(expr1_0); } _ => {} diff --git a/cranelift/codegen/src/isa/x64/mod.rs b/cranelift/codegen/src/isa/x64/mod.rs index a86ffc34fb..6e8256f075 100644 --- a/cranelift/codegen/src/isa/x64/mod.rs +++ b/cranelift/codegen/src/isa/x64/mod.rs @@ -6,8 +6,9 @@ use super::TargetIsa; use crate::ir::{condcodes::IntCC, Function}; #[cfg(feature = "unwind")] use crate::isa::unwind::systemv; -use crate::isa::x64::{inst::regs::create_reg_universe_systemv, settings as x64_settings}; +use crate::isa::x64::{inst::regs::create_reg_env_systemv, settings as x64_settings}; use crate::isa::Builder as IsaBuilder; +use crate::machinst::Reg; use crate::machinst::{ compile, MachCompileResult, MachTextSectionBuilder, TextSectionBuilder, VCode, }; @@ -15,8 +16,7 @@ use crate::result::{CodegenError, CodegenResult}; use crate::settings::{self as shared_settings, Flags}; use alloc::{boxed::Box, vec::Vec}; use core::fmt; - -use regalloc::{PrettyPrint, RealRegUniverse, Reg}; +use regalloc2::MachineEnv; use target_lexicon::Triple; mod abi; @@ -30,27 +30,31 @@ pub(crate) struct X64Backend { triple: Triple, flags: Flags, x64_flags: x64_settings::Flags, - reg_universe: RealRegUniverse, + reg_env: MachineEnv, } impl X64Backend { /// Create a new X64 backend with the given (shared) flags. fn new_with_flags(triple: Triple, flags: Flags, x64_flags: x64_settings::Flags) -> Self { - let reg_universe = create_reg_universe_systemv(&flags); + let reg_env = create_reg_env_systemv(&flags); Self { triple, flags, x64_flags, - reg_universe, + reg_env, } } - fn compile_vcode(&self, func: &Function, flags: Flags) -> CodegenResult> { + fn compile_vcode( + &self, + func: &Function, + flags: Flags, + ) -> CodegenResult<(VCode, regalloc2::Output)> { // This performs lowering to VCode, register-allocates the code, computes // block layout and finalizes branches. The result is ready for binary emission. let emit_info = EmitInfo::new(flags.clone(), self.x64_flags.clone()); let abi = Box::new(abi::X64ABICallee::new(&func, flags, self.isa_flags())?); - compile::compile::(&func, self, abi, &self.reg_universe, emit_info) + compile::compile::(&func, self, abi, &self.reg_env, emit_info) } } @@ -61,28 +65,27 @@ impl TargetIsa for X64Backend { want_disasm: bool, ) -> CodegenResult { let flags = self.flags(); - let vcode = self.compile_vcode(func, flags.clone())?; + let (vcode, regalloc_result) = self.compile_vcode(func, flags.clone())?; - let (buffer, bb_starts, bb_edges) = vcode.emit(); - let buffer = buffer.finish(); - let frame_size = vcode.frame_size(); - let value_labels_ranges = vcode.value_labels_ranges(); - let stackslot_offsets = vcode.stackslot_offsets().clone(); + let want_disasm = want_disasm || log::log_enabled!(log::Level::Debug); + let emit_result = vcode.emit(®alloc_result, want_disasm, flags.machine_code_cfg_info()); + let frame_size = emit_result.frame_size; + let value_labels_ranges = emit_result.value_labels_ranges; + let buffer = emit_result.buffer.finish(); + let stackslot_offsets = emit_result.stackslot_offsets; - let disasm = if want_disasm { - Some(vcode.show_rru(Some(&create_reg_universe_systemv(flags)))) - } else { - None - }; + if let Some(disasm) = emit_result.disasm.as_ref() { + log::debug!("disassembly:\n{}", disasm); + } Ok(MachCompileResult { buffer, frame_size, - disasm, + disasm: emit_result.disasm, value_labels_ranges, stackslot_offsets, - bb_starts, - bb_edges, + bb_starts: emit_result.bb_offsets, + bb_edges: emit_result.bb_edges, }) } @@ -319,30 +322,29 @@ mod test { // 00000000 55 push rbp // 00000001 4889E5 mov rbp,rsp - // 00000004 4889FE mov rsi,rdi - // 00000007 81C634120000 add esi,0x1234 - // 0000000D 85F6 test esi,esi - // 0000000F 0F841B000000 jz near 0x30 - // 00000015 4889F7 mov rdi,rsi - // 00000018 4889F0 mov rax,rsi - // 0000001B 81E834120000 sub eax,0x1234 - // 00000021 01F8 add eax,edi - // 00000023 85F6 test esi,esi - // 00000025 0F8505000000 jnz near 0x30 - // 0000002B 4889EC mov rsp,rbp - // 0000002E 5D pop rbp - // 0000002F C3 ret - // 00000030 4889F7 mov rdi,rsi <--- cold block - // 00000033 81C734120000 add edi,0x1234 - // 00000039 85FF test edi,edi - // 0000003B 0F85EFFFFFFF jnz near 0x30 - // 00000041 E9D2FFFFFF jmp 0x18 + // 00000004 81C734120000 add edi,0x1234 + // 0000000A 85FF test edi,edi + // 0000000C 0F841C000000 jz near 0x2e + // 00000012 4989F8 mov r8,rdi + // 00000015 4889F8 mov rax,rdi + // 00000018 81E834120000 sub eax,0x1234 + // 0000001E 4401C0 add eax,r8d + // 00000021 85FF test edi,edi + // 00000023 0F8505000000 jnz near 0x2e + // 00000029 4889EC mov rsp,rbp + // 0000002C 5D pop rbp + // 0000002D C3 ret + // 0000002E 4989F8 mov r8,rdi + // 00000031 4181C034120000 add r8d,0x1234 + // 00000038 4585C0 test r8d,r8d + // 0000003B 0F85EDFFFFFF jnz near 0x2e + // 00000041 E9CFFFFFFF jmp 0x15 let golden = vec![ - 85, 72, 137, 229, 72, 137, 254, 129, 198, 52, 18, 0, 0, 133, 246, 15, 132, 27, 0, 0, 0, - 72, 137, 247, 72, 137, 240, 129, 232, 52, 18, 0, 0, 1, 248, 133, 246, 15, 133, 5, 0, 0, - 0, 72, 137, 236, 93, 195, 72, 137, 247, 129, 199, 52, 18, 0, 0, 133, 255, 15, 133, 239, - 255, 255, 255, 233, 210, 255, 255, 255, + 85, 72, 137, 229, 129, 199, 52, 18, 0, 0, 133, 255, 15, 132, 28, 0, 0, 0, 73, 137, 248, + 72, 137, 248, 129, 232, 52, 18, 0, 0, 68, 1, 192, 133, 255, 15, 133, 5, 0, 0, 0, 72, + 137, 236, 93, 195, 73, 137, 248, 65, 129, 192, 52, 18, 0, 0, 69, 133, 192, 15, 133, + 237, 255, 255, 255, 233, 207, 255, 255, 255, ]; assert_eq!(code, &golden[..]); diff --git a/cranelift/codegen/src/lib.rs b/cranelift/codegen/src/lib.rs index 3d5ca20035..2cd498f9c1 100644 --- a/cranelift/codegen/src/lib.rs +++ b/cranelift/codegen/src/lib.rs @@ -101,7 +101,6 @@ mod inst_predicates; mod iterators; mod legalizer; mod licm; -mod log; mod nan_canonicalization; mod remove_constant_phis; mod result; diff --git a/cranelift/codegen/src/log.rs b/cranelift/codegen/src/log.rs deleted file mode 100644 index c5bd59aa58..0000000000 --- a/cranelift/codegen/src/log.rs +++ /dev/null @@ -1,39 +0,0 @@ -//! This module implements deferred display helpers. -//! -//! These are particularly useful in logging contexts, where the maximum logging level filter might -//! be enabled, but we don't want the arguments to be evaluated early: -//! -//! ``` -//! log::set_max_level(log::LevelFilter::max()); -//! fn expensive_calculation() -> String { -//! "a string that is very slow to generate".into() -//! } -//! log::debug!("{}", expensive_calculation()); -//! ``` -//! -//! If the associated log implementation filters out log debug entries, the expensive calculation -//! would have been spurious. In this case, we can wrap the expensive computation within an -//! `DeferredDisplay`, so that the computation only happens when the actual `fmt` function is -//! called. - -use core::fmt; - -pub(crate) struct DeferredDisplay(F); - -impl T, T: fmt::Display> DeferredDisplay { - pub(crate) fn new(f: F) -> Self { - Self(f) - } -} - -impl T, T: fmt::Display> fmt::Display for DeferredDisplay { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - self.0().fmt(f) - } -} - -impl T, T: fmt::Debug> fmt::Debug for DeferredDisplay { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - self.0().fmt(f) - } -} diff --git a/cranelift/codegen/src/machinst/abi.rs b/cranelift/codegen/src/machinst/abi.rs index d32993d1ce..4d7442b670 100644 --- a/cranelift/codegen/src/machinst/abi.rs +++ b/cranelift/codegen/src/machinst/abi.rs @@ -5,7 +5,6 @@ use crate::ir::{Signature, StackSlot}; use crate::isa::CallConv; use crate::machinst::*; use crate::settings; -use regalloc::{Reg, Set, SpillSlot, Writable}; use smallvec::SmallVec; /// A small vector of instructions (with some reasonable size); appropriate for @@ -42,12 +41,6 @@ pub trait ABICallee { /// Get the calling convention implemented by this ABI object. fn call_conv(&self) -> CallConv; - /// Get the liveins of the function. - fn liveins(&self) -> Set; - - /// Get the liveouts of the function. - fn liveouts(&self) -> Set; - /// Number of arguments. fn num_args(&self) -> usize; @@ -106,7 +99,7 @@ pub trait ABICallee { fn set_num_spillslots(&mut self, slots: usize); /// Update with the clobbered registers, post-regalloc. - fn set_clobbered(&mut self, clobbered: Set>); + fn set_clobbered(&mut self, clobbered: Vec>); /// Get the address of a stackslot. fn stackslot_addr(&self, slot: StackSlot, offset: u32, into_reg: Writable) -> Self::I; diff --git a/cranelift/codegen/src/machinst/abi_impl.rs b/cranelift/codegen/src/machinst/abi_impl.rs index 065ee7074c..58648ad9f6 100644 --- a/cranelift/codegen/src/machinst/abi_impl.rs +++ b/cranelift/codegen/src/machinst/abi_impl.rs @@ -125,6 +125,7 @@ use super::abi::*; use crate::binemit::StackMap; +use crate::fx::FxHashSet; use crate::ir::types::*; use crate::ir::{ArgumentExtension, ArgumentPurpose, StackSlot}; use crate::machinst::*; @@ -132,7 +133,6 @@ use crate::settings; use crate::CodegenResult; use crate::{ir, isa}; use alloc::vec::Vec; -use regalloc::{RealReg, Reg, RegClass, Set, SpillSlot, Writable}; use smallvec::{smallvec, SmallVec}; use std::convert::TryFrom; use std::marker::PhantomData; @@ -257,16 +257,6 @@ pub enum ArgsOrRets { Rets, } -/// Is an instruction returned by an ABI machine-specific backend a safepoint, -/// or not? -#[derive(Clone, Copy, Debug, PartialEq, Eq)] -pub enum InstIsSafepoint { - /// The instruction is a safepoint. - Yes, - /// The instruction is not a safepoint. - No, -} - /// Abstract location for a machine-specific ABI impl to translate into the /// appropriate addressing mode. #[derive(Clone, Copy, Debug)] @@ -319,11 +309,7 @@ pub trait ABIMachineSpec { /// Returns word register class. fn word_reg_class() -> RegClass { - match Self::word_bits() { - 32 => RegClass::I32, - 64 => RegClass::I64, - _ => unreachable!(), - } + RegClass::Int } /// Returns required stack alignment in bytes. @@ -366,7 +352,7 @@ pub trait ABIMachineSpec { ) -> Self::I; /// Generate a return instruction. - fn gen_ret() -> Self::I; + fn gen_ret(rets: Vec) -> Self::I; /// Generate an "epilogue placeholder" instruction, recognized by lowering /// when using the Baldrdash ABI. @@ -442,7 +428,7 @@ pub trait ABIMachineSpec { /// contains the registers in a sorted order. fn get_clobbered_callee_saves( call_conv: isa::CallConv, - regs: &Set>, + regs: &[Writable], ) -> Vec>; /// Determine whether it is necessary to generate the usual frame-setup @@ -466,7 +452,7 @@ pub trait ABIMachineSpec { call_conv: isa::CallConv, setup_frame: bool, flags: &settings::Flags, - clobbered_callee_saves: &Vec>, + clobbered_callee_saves: &[Writable], fixed_frame_storage_size: u32, outgoing_args_size: u32, ) -> (u64, SmallVec<[Self::I; 16]>); @@ -478,7 +464,7 @@ pub trait ABIMachineSpec { fn gen_clobber_restore( call_conv: isa::CallConv, flags: &settings::Flags, - clobbers: &Set>, + clobbers: &[Writable], fixed_frame_storage_size: u32, outgoing_args_size: u32, ) -> SmallVec<[Self::I; 16]>; @@ -493,7 +479,7 @@ pub trait ABIMachineSpec { tmp: Writable, callee_conv: isa::CallConv, callee_conv: isa::CallConv, - ) -> SmallVec<[(InstIsSafepoint, Self::I); 2]>; + ) -> SmallVec<[Self::I; 2]>; /// Generate a memcpy invocation. Used to set up struct args. May clobber /// caller-save registers; we only memcpy before we start to set up args for @@ -530,6 +516,7 @@ pub trait ABIMachineSpec { } /// ABI information shared between body (callee) and caller. +#[derive(Clone)] struct ABISig { /// Argument locations (regs or stack slots). Stack offsets are relative to /// SP on entry to function. @@ -604,7 +591,7 @@ pub struct ABICalleeImpl { /// Stack size to be reserved for outgoing arguments. outgoing_args_size: u32, /// Clobbered registers, from regalloc. - clobbered: Set>, + clobbered: Vec>, /// Total number of spillslots, from regalloc. spillslots: Option, /// Storage allocated for the fixed part of the stack frame. This is @@ -655,24 +642,13 @@ fn get_special_purpose_param_register( let idx = f.signature.special_param_index(purpose)?; match &abi.args[idx] { &ABIArg::Slots { ref slots, .. } => match &slots[0] { - &ABIArgSlot::Reg { reg, .. } => Some(reg.to_reg()), + &ABIArgSlot::Reg { reg, .. } => Some(reg.into()), _ => None, }, _ => None, } } -fn ty_from_class(class: RegClass) -> Type { - match class { - RegClass::I32 => I32, - RegClass::I64 => I64, - RegClass::F32 => F32, - RegClass::F64 => F64, - RegClass::V128 => I8X16, - _ => panic!("Unknown regclass: {:?}", class), - } -} - impl ABICalleeImpl { /// Create a new body ABI instance. pub fn new( @@ -739,7 +715,7 @@ impl ABICalleeImpl { stackslots, stackslots_size: stack_offset, outgoing_args_size: 0, - clobbered: Set::empty(), + clobbered: vec![], spillslots: None, fixed_frame_storage_size: 0, total_frame_size: None, @@ -961,34 +937,6 @@ impl ABICallee for ABICalleeImpl { self.sig.call_conv } - fn liveins(&self) -> Set { - let mut set: Set = Set::empty(); - for arg in &self.sig.args { - if let &ABIArg::Slots { ref slots, .. } = arg { - for slot in slots { - if let ABIArgSlot::Reg { reg, .. } = slot { - set.insert(*reg); - } - } - } - } - set - } - - fn liveouts(&self) -> Set { - let mut set: Set = Set::empty(); - for ret in &self.sig.rets { - if let &ABIArg::Slots { ref slots, .. } = ret { - for slot in slots { - if let ABIArgSlot::Reg { reg, .. } = slot { - set.insert(*reg); - } - } - } - } - set - } - fn num_args(&self) -> usize { self.sig.args.len() } @@ -1019,7 +967,7 @@ impl ABICallee for ABICalleeImpl { // Extension mode doesn't matter (we're copying out, not in; we // ignore high bits by convention). &ABIArgSlot::Reg { reg, ty, .. } => { - insts.push(M::gen_move(*into_reg, reg.to_reg(), ty)); + insts.push(M::gen_move(*into_reg, reg.into(), ty)); } &ABIArgSlot::Stack { offset, ty, .. } => { insts.push(M::gen_load_stack( @@ -1069,20 +1017,21 @@ impl ABICallee for ABICalleeImpl { match &self.sig.rets[idx] { &ABIArg::Slots { ref slots, .. } => { assert_eq!(from_regs.len(), slots.len()); - for (slot, from_reg) in slots.iter().zip(from_regs.regs().iter()) { + for (slot, &from_reg) in slots.iter().zip(from_regs.regs().iter()) { match slot { &ABIArgSlot::Reg { reg, ty, extension, .. } => { let from_bits = ty_bits(ty) as u8; let ext = M::get_ext_mode(self.sig.call_conv, extension); + let reg: Writable = Writable::from_reg(Reg::from(reg)); match (ext, from_bits) { (ArgumentExtension::Uext, n) | (ArgumentExtension::Sext, n) if n < word_bits => { let signed = ext == ArgumentExtension::Sext; ret.push(M::gen_extend( - Writable::from_reg(reg.to_reg()), + reg, from_reg.to_reg(), signed, from_bits, @@ -1090,11 +1039,7 @@ impl ABICallee for ABICalleeImpl { )); } _ => { - ret.push(M::gen_move( - Writable::from_reg(reg.to_reg()), - from_reg.to_reg(), - ty, - )); + ret.push(M::gen_move(reg, from_reg.to_reg(), ty)); } }; } @@ -1118,7 +1063,7 @@ impl ABICallee for ABICalleeImpl { (ArgumentExtension::Uext, n) | (ArgumentExtension::Sext, n) if n < word_bits => { - assert_eq!(M::word_reg_class(), from_reg.to_reg().get_class()); + assert_eq!(M::word_reg_class(), from_reg.to_reg().class()); let signed = ext == ArgumentExtension::Sext; ret.push(M::gen_extend( Writable::from_reg(from_reg.to_reg()), @@ -1166,7 +1111,22 @@ impl ABICallee for ABICalleeImpl { } fn gen_ret(&self) -> Self::I { - M::gen_ret() + let mut rets = vec![]; + for ret in &self.sig.rets { + match ret { + ABIArg::Slots { slots, .. } => { + for slot in slots { + match slot { + ABIArgSlot::Reg { reg, .. } => rets.push(Reg::from(*reg)), + _ => {} + } + } + } + _ => {} + } + } + + M::gen_ret(rets) } fn gen_epilogue_placeholder(&self) -> Self::I { @@ -1177,7 +1137,7 @@ impl ABICallee for ABICalleeImpl { self.spillslots = Some(slots); } - fn set_clobbered(&mut self, clobbered: Set>) { + fn set_clobbered(&mut self, clobbered: Vec>) { self.clobbered = clobbered; } @@ -1198,7 +1158,7 @@ impl ABICallee for ABICalleeImpl { into_regs: ValueRegs>, ) -> SmallInstVec { // Offset from beginning of spillslot area, which is at nominal SP + stackslots_size. - let islot = slot.get() as i64; + let islot = slot.index() as i64; let spill_off = islot * M::word_bytes() as i64; let sp_off = self.stackslots_size as i64 + spill_off; log::trace!("load_spillslot: slot {:?} -> sp_off {}", slot, sp_off); @@ -1214,7 +1174,7 @@ impl ABICallee for ABICalleeImpl { from_regs: ValueRegs, ) -> SmallInstVec { // Offset from beginning of spillslot area, which is at nominal SP + stackslots_size. - let islot = slot.get() as i64; + let islot = slot.index() as i64; let spill_off = islot * M::word_bytes() as i64; let sp_off = self.stackslots_size as i64 + spill_off; log::trace!("store_spillslot: slot {:?} -> sp_off {}", slot, sp_off); @@ -1245,7 +1205,7 @@ impl ABICallee for ABICalleeImpl { let first_spillslot_word = ((self.stackslots_size + virtual_sp_offset as u32) / bytes) as usize; for &slot in slots { - let slot = slot.get() as usize; + let slot = slot.index(); bits[first_spillslot_word + slot] = true; } @@ -1347,7 +1307,10 @@ impl ABICallee for ABICalleeImpl { insts.extend(M::gen_epilogue_frame_restore(&self.flags)); } - insts.push(M::gen_ret()); + // This `ret` doesn't need any return registers attached + // because we are post-regalloc and don't need to + // represent the implicit uses anymore. + insts.push(M::gen_ret(vec![])); } log::trace!("Epilogue: {:?}", insts); @@ -1368,19 +1331,19 @@ impl ABICallee for ABICalleeImpl { } fn gen_spill(&self, to_slot: SpillSlot, from_reg: RealReg) -> Self::I { - let ty = ty_from_class(from_reg.to_reg().get_class()); - self.store_spillslot(to_slot, ty, ValueRegs::one(from_reg.to_reg())) + let ty = Self::I::canonical_type_for_rc(Reg::from(from_reg).class()); + self.store_spillslot(to_slot, ty, ValueRegs::one(Reg::from(from_reg))) .into_iter() .next() .unwrap() } fn gen_reload(&self, to_reg: Writable, from_slot: SpillSlot) -> Self::I { - let ty = ty_from_class(to_reg.to_reg().get_class()); + let ty = Self::I::canonical_type_for_rc(to_reg.to_reg().class()); self.load_spillslot( from_slot, ty, - writable_value_regs(ValueRegs::one(to_reg.to_reg().to_reg())), + writable_value_regs(ValueRegs::one(Reg::from(to_reg.to_reg()))), ) .into_iter() .next() @@ -1390,13 +1353,13 @@ impl ABICallee for ABICalleeImpl { fn abisig_to_uses_and_defs(sig: &ABISig) -> (Vec, Vec>) { // Compute uses: all arg regs. - let mut uses = Vec::new(); + let mut uses = FxHashSet::default(); for arg in &sig.args { if let &ABIArg::Slots { ref slots, .. } = arg { for slot in slots { match slot { &ABIArgSlot::Reg { reg, .. } => { - uses.push(reg.to_reg()); + uses.insert(Reg::from(reg)); } _ => {} } @@ -1405,13 +1368,15 @@ fn abisig_to_uses_and_defs(sig: &ABISig) -> (Vec, Vec = M::get_regs_clobbered_by_call(sig.call_conv) + .into_iter() + .collect(); for ret in &sig.rets { if let &ABIArg::Slots { ref slots, .. } = ret { for slot in slots { match slot { &ABIArgSlot::Reg { reg, .. } => { - defs.push(Writable::from_reg(reg.to_reg())); + defs.insert(Writable::from_reg(Reg::from(reg))); } _ => {} } @@ -1419,6 +1384,11 @@ fn abisig_to_uses_and_defs(sig: &ABISig) -> (Vec, Vec>(); + let mut defs = defs.into_iter().collect::>(); + uses.sort_unstable(); + defs.sort_unstable(); + (uses, defs) } @@ -1567,14 +1537,14 @@ impl ABICaller for ABICallerImpl { } => { let ext = M::get_ext_mode(self.sig.call_conv, extension); if ext != ir::ArgumentExtension::None && ty_bits(ty) < word_bits { - assert_eq!(word_rc, reg.get_class()); + assert_eq!(word_rc, reg.class()); let signed = match ext { ir::ArgumentExtension::Uext => false, ir::ArgumentExtension::Sext => true, _ => unreachable!(), }; ctx.emit(M::gen_extend( - Writable::from_reg(reg.to_reg()), + Writable::from_reg(Reg::from(reg)), *from_reg, signed, ty_bits(ty) as u8, @@ -1582,7 +1552,7 @@ impl ABICaller for ABICallerImpl { )); } else { ctx.emit(M::gen_move( - Writable::from_reg(reg.to_reg()), + Writable::from_reg(Reg::from(reg)), *from_reg, ty, )); @@ -1597,7 +1567,7 @@ impl ABICaller for ABICallerImpl { let mut ty = ty; let ext = M::get_ext_mode(self.sig.call_conv, extension); if ext != ir::ArgumentExtension::None && ty_bits(ty) < word_bits { - assert_eq!(word_rc, from_reg.get_class()); + assert_eq!(word_rc, from_reg.class()); let signed = match ext { ir::ArgumentExtension::Uext => false, ir::ArgumentExtension::Sext => true, @@ -1680,7 +1650,7 @@ impl ABICaller for ABICallerImpl { // Extension mode doesn't matter because we're copying out, not in, // and we ignore high bits in our own registers by convention. &ABIArgSlot::Reg { reg, ty, .. } => { - ctx.emit(M::gen_move(*into_reg, reg.to_reg(), ty)); + ctx.emit(M::gen_move(*into_reg, Reg::from(reg), ty)); } &ABIArgSlot::Stack { offset, ty, .. } => { let ret_area_base = self.sig.stack_arg_space; @@ -1716,7 +1686,7 @@ impl ABICaller for ABICallerImpl { self.emit_copy_regs_to_arg(ctx, i, ValueRegs::one(rd.to_reg())); } let tmp = ctx.alloc_tmp(word_type).only_reg().unwrap(); - for (is_safepoint, inst) in M::gen_call( + for inst in M::gen_call( &self.dest, uses, defs, @@ -1727,10 +1697,7 @@ impl ABICaller for ABICallerImpl { ) .into_iter() { - match is_safepoint { - InstIsSafepoint::Yes => ctx.emit_safepoint(inst), - InstIsSafepoint::No => ctx.emit(inst), - } + ctx.emit(inst); } } } diff --git a/cranelift/codegen/src/machinst/blockorder.rs b/cranelift/codegen/src/machinst/blockorder.rs index 5c805c4029..718abcd229 100644 --- a/cranelift/codegen/src/machinst/blockorder.rs +++ b/cranelift/codegen/src/machinst/blockorder.rs @@ -127,6 +127,9 @@ pub enum LoweredBlock { /// to the next, i.e., corresponding to the included edge-block. This /// will be an instruction in `block`. edge_inst: Inst, + /// The successor index in this edge, to distinguish multiple + /// edges between the same block pair. + succ_idx: usize, /// The successor CLIF block. succ: Block, }, @@ -138,6 +141,9 @@ pub enum LoweredBlock { /// The edge (jump) instruction corresponding to the included /// edge-block. This will be an instruction in `pred`. edge_inst: Inst, + /// The successor index in this edge, to distinguish multiple + /// edges between the same block pair. + succ_idx: usize, /// The original CLIF block included in this lowered block. block: Block, }, @@ -150,6 +156,9 @@ pub enum LoweredBlock { /// The edge (jump) instruction corresponding to this edge's transition. /// This will be an instruction in `pred`. edge_inst: Inst, + /// The successor index in this edge, to distinguish multiple + /// edges between the same block pair. + succ_idx: usize, /// The successor CLIF block. succ: Block, }, @@ -168,29 +177,34 @@ impl LoweredBlock { } /// The associated in-edge, if any. + #[cfg(test)] pub fn in_edge(self) -> Option<(Block, Inst, Block)> { match self { LoweredBlock::EdgeAndOrig { pred, edge_inst, block, + .. } => Some((pred, edge_inst, block)), _ => None, } } /// the associated out-edge, if any. Also includes edge-only blocks. + #[cfg(test)] pub fn out_edge(self) -> Option<(Block, Inst, Block)> { match self { LoweredBlock::OrigAndEdge { block, edge_inst, succ, + .. } => Some((block, edge_inst, succ)), LoweredBlock::Edge { pred, edge_inst, succ, + .. } => Some((pred, edge_inst, succ)), _ => None, } @@ -207,15 +221,17 @@ impl BlockLoweringOrder { let mut block_out_count = SecondaryMap::with_default(0); // Cache the block successors to avoid re-examining branches below. - let mut block_succs: SmallVec<[(Inst, Block); 128]> = SmallVec::new(); + let mut block_succs: SmallVec<[(Inst, usize, Block); 128]> = SmallVec::new(); let mut block_succ_range = SecondaryMap::with_default((0, 0)); let mut fallthrough_return_block = None; for block in f.layout.blocks() { let block_succ_start = block_succs.len(); + let mut succ_idx = 0; visit_block_succs(f, block, |inst, succ| { block_out_count[block] += 1; block_in_count[succ] += 1; - block_succs.push((inst, succ)); + block_succs.push((inst, succ_idx, succ)); + succ_idx += 1; }); let block_succ_end = block_succs.len(); block_succ_range[block] = (block_succ_start, block_succ_end); @@ -262,13 +278,14 @@ impl BlockLoweringOrder { // At an orig block; successors are always edge blocks, // possibly with orig blocks following. let range = block_succ_range[block]; - for &(edge_inst, succ) in &block_succs[range.0..range.1] { + for &(edge_inst, succ_idx, succ) in &block_succs[range.0..range.1] { if block_in_count[succ] == 1 { ret.push(( edge_inst, LoweredBlock::EdgeAndOrig { pred: block, edge_inst, + succ_idx, block: succ, }, )); @@ -278,6 +295,7 @@ impl BlockLoweringOrder { LoweredBlock::Edge { pred: block, edge_inst, + succ_idx, succ, }, )); @@ -298,12 +316,13 @@ impl BlockLoweringOrder { // implicit return succ). if range.1 - range.0 > 0 { debug_assert!(range.1 - range.0 == 1); - let (succ_edge_inst, succ_succ) = block_succs[range.0]; + let (succ_edge_inst, succ_succ_idx, succ_succ) = block_succs[range.0]; ret.push(( edge_inst, LoweredBlock::OrigAndEdge { block: succ, edge_inst: succ_edge_inst, + succ_idx: succ_succ_idx, succ: succ_succ, }, )); @@ -395,7 +414,7 @@ impl BlockLoweringOrder { let mut lowered_succ_ranges = vec![]; let mut lb_to_bindex = FxHashMap::default(); for (block, succ_range) in rpo.into_iter() { - let index = lowered_order.len() as BlockIndex; + let index = BlockIndex::new(lowered_order.len()); lb_to_bindex.insert(block, index); lowered_order.push(block); lowered_succ_ranges.push(succ_range); @@ -416,7 +435,7 @@ impl BlockLoweringOrder { let mut orig_map = SecondaryMap::with_default(None); for (i, lb) in lowered_order.iter().enumerate() { - let i = i as BlockIndex; + let i = BlockIndex::new(i); if let Some(b) = lb.orig_block() { orig_map[b] = Some(i); } @@ -441,7 +460,7 @@ impl BlockLoweringOrder { /// Get the successor indices for a lowered block. pub fn succ_indices(&self, block: BlockIndex) -> &[(Inst, BlockIndex)] { - let range = self.lowered_succ_ranges[block as usize]; + let range = self.lowered_succ_ranges[block.index()]; &self.lowered_succ_indices[range.0..range.1] } diff --git a/cranelift/codegen/src/machinst/buffer.rs b/cranelift/codegen/src/machinst/buffer.rs index ffc96c1abb..7802b44e72 100644 --- a/cranelift/codegen/src/machinst/buffer.rs +++ b/cranelift/codegen/src/machinst/buffer.rs @@ -269,7 +269,7 @@ impl MachLabel { /// Get a label for a block. (The first N MachLabels are always reseved for /// the N blocks in the vcode.) pub fn from_block(bindex: BlockIndex) -> MachLabel { - MachLabel(bindex) + MachLabel(bindex.index() as u32) } /// Get the numeric label index. @@ -334,7 +334,7 @@ impl MachBuffer { /// times, e.g. after calling `add_{cond,uncond}_branch()` and /// before emitting branch bytes. fn check_label_branch_invariants(&self) { - if !cfg!(debug_assertions) || cfg!(fuzzing) { + if !cfg!(fuzzing) { return; } let cur_off = self.cur_offset(); @@ -489,12 +489,11 @@ impl MachBuffer { } /// Reserve the first N MachLabels for blocks. - pub fn reserve_labels_for_blocks(&mut self, blocks: BlockIndex) { + pub fn reserve_labels_for_blocks(&mut self, blocks: usize) { trace!("MachBuffer: first {} labels are for blocks", blocks); debug_assert!(self.label_offsets.is_empty()); - self.label_offsets - .resize(blocks as usize, UNKNOWN_LABEL_OFFSET); - self.label_aliases.resize(blocks as usize, UNKNOWN_LABEL); + self.label_offsets.resize(blocks, UNKNOWN_LABEL_OFFSET); + self.label_aliases.resize(blocks, UNKNOWN_LABEL); // Post-invariant: as for `get_label()`. } @@ -1599,14 +1598,14 @@ impl MachBranch { /// resolving labels internally in the buffer. pub struct MachTextSectionBuilder { buf: MachBuffer, - next_func: u32, + next_func: usize, force_veneers: bool, } impl MachTextSectionBuilder { pub fn new(num_funcs: u32) -> MachTextSectionBuilder { let mut buf = MachBuffer::new(); - buf.reserve_labels_for_blocks(num_funcs); + buf.reserve_labels_for_blocks(num_funcs as usize); MachTextSectionBuilder { buf, next_func: 0, @@ -1627,7 +1626,8 @@ impl TextSectionBuilder for MachTextSectionBuilder { self.buf.align_to(align.unwrap_or(I::LabelUse::ALIGN)); let pos = self.buf.cur_offset(); if named { - self.buf.bind_label(MachLabel::from_block(self.next_func)); + self.buf + .bind_label(MachLabel::from_block(BlockIndex::new(self.next_func))); self.next_func += 1; } self.buf.put_data(func); @@ -1635,7 +1635,7 @@ impl TextSectionBuilder for MachTextSectionBuilder { } fn resolve_reloc(&mut self, offset: u64, reloc: Reloc, addend: Addend, target: u32) -> bool { - let label = MachLabel::from_block(target); + let label = MachLabel::from_block(BlockIndex::new(target as usize)); let offset = u32::try_from(offset).unwrap(); match I::LabelUse::from_reloc(reloc, addend) { Some(label_use) => { @@ -1652,7 +1652,7 @@ impl TextSectionBuilder for MachTextSectionBuilder { fn finish(&mut self) -> Vec { // Double-check all functions were pushed. - assert_eq!(self.next_func, self.buf.label_offsets.len() as u32); + assert_eq!(self.next_func, self.buf.label_offsets.len()); // Finish up any veneers, if necessary. self.buf @@ -1675,7 +1675,7 @@ mod test { use std::vec::Vec; fn label(n: u32) -> MachLabel { - MachLabel::from_block(n) + MachLabel::from_block(BlockIndex::new(n as usize)) } fn target(n: u32) -> BranchTarget { BranchTarget::Label(label(n)) @@ -1690,7 +1690,7 @@ mod test { buf.reserve_labels_for_blocks(2); buf.bind_label(label(0)); let inst = Inst::Jump { dest: target(1) }; - inst.emit(&mut buf, &info, &mut state); + inst.emit(&[], &mut buf, &info, &mut state); buf.bind_label(label(1)); let buf = buf.finish(); assert_eq!(0, buf.total_size()); @@ -1710,15 +1710,15 @@ mod test { taken: target(1), not_taken: target(2), }; - inst.emit(&mut buf, &info, &mut state); + inst.emit(&[], &mut buf, &info, &mut state); buf.bind_label(label(1)); let inst = Inst::Jump { dest: target(3) }; - inst.emit(&mut buf, &info, &mut state); + inst.emit(&[], &mut buf, &info, &mut state); buf.bind_label(label(2)); let inst = Inst::Jump { dest: target(3) }; - inst.emit(&mut buf, &info, &mut state); + inst.emit(&[], &mut buf, &info, &mut state); buf.bind_label(label(3)); @@ -1740,17 +1740,17 @@ mod test { taken: target(1), not_taken: target(2), }; - inst.emit(&mut buf, &info, &mut state); + inst.emit(&[], &mut buf, &info, &mut state); buf.bind_label(label(1)); let inst = Inst::Udf { trap_code: TrapCode::Interrupt, }; - inst.emit(&mut buf, &info, &mut state); + inst.emit(&[], &mut buf, &info, &mut state); buf.bind_label(label(2)); let inst = Inst::Nop4; - inst.emit(&mut buf, &info, &mut state); + inst.emit(&[], &mut buf, &info, &mut state); buf.bind_label(label(3)); @@ -1762,9 +1762,9 @@ mod test { kind: CondBrKind::NotZero(xreg(0)), trap_code: TrapCode::Interrupt, }; - inst.emit(&mut buf2, &info, &mut state); + inst.emit(&[], &mut buf2, &info, &mut state); let inst = Inst::Nop4; - inst.emit(&mut buf2, &info, &mut state); + inst.emit(&[], &mut buf2, &info, &mut state); let buf2 = buf2.finish(); @@ -1785,7 +1785,7 @@ mod test { taken: target(2), not_taken: target(3), }; - inst.emit(&mut buf, &info, &mut state); + inst.emit(&[], &mut buf, &info, &mut state); buf.bind_label(label(1)); while buf.cur_offset() < 2000000 { @@ -1793,16 +1793,16 @@ mod test { buf.emit_island(0); } let inst = Inst::Nop4; - inst.emit(&mut buf, &info, &mut state); + inst.emit(&[], &mut buf, &info, &mut state); } buf.bind_label(label(2)); let inst = Inst::Nop4; - inst.emit(&mut buf, &info, &mut state); + inst.emit(&[], &mut buf, &info, &mut state); buf.bind_label(label(3)); let inst = Inst::Nop4; - inst.emit(&mut buf, &info, &mut state); + inst.emit(&[], &mut buf, &info, &mut state); let buf = buf.finish(); @@ -1831,7 +1831,7 @@ mod test { // go directly to the target. not_taken: BranchTarget::ResolvedOffset(2000000 + 4 - 4), }; - inst.emit(&mut buf2, &info, &mut state); + inst.emit(&[], &mut buf2, &info, &mut state); let buf2 = buf2.finish(); @@ -1848,16 +1848,16 @@ mod test { buf.bind_label(label(0)); let inst = Inst::Nop4; - inst.emit(&mut buf, &info, &mut state); + inst.emit(&[], &mut buf, &info, &mut state); buf.bind_label(label(1)); let inst = Inst::Nop4; - inst.emit(&mut buf, &info, &mut state); + inst.emit(&[], &mut buf, &info, &mut state); buf.bind_label(label(2)); while buf.cur_offset() < 2000000 { let inst = Inst::Nop4; - inst.emit(&mut buf, &info, &mut state); + inst.emit(&[], &mut buf, &info, &mut state); } buf.bind_label(label(3)); @@ -1866,7 +1866,7 @@ mod test { taken: target(0), not_taken: target(1), }; - inst.emit(&mut buf, &info, &mut state); + inst.emit(&[], &mut buf, &info, &mut state); let buf = buf.finish(); @@ -1879,11 +1879,11 @@ mod test { taken: BranchTarget::ResolvedOffset(8), not_taken: BranchTarget::ResolvedOffset(4 - (2000000 + 4)), }; - inst.emit(&mut buf2, &info, &mut state); + inst.emit(&[], &mut buf2, &info, &mut state); let inst = Inst::Jump { dest: BranchTarget::ResolvedOffset(-(2000000 + 8)), }; - inst.emit(&mut buf2, &info, &mut state); + inst.emit(&[], &mut buf2, &info, &mut state); let buf2 = buf2.finish(); @@ -1937,38 +1937,38 @@ mod test { taken: target(1), not_taken: target(2), }; - inst.emit(&mut buf, &info, &mut state); + inst.emit(&[], &mut buf, &info, &mut state); buf.bind_label(label(1)); let inst = Inst::Jump { dest: target(3) }; - inst.emit(&mut buf, &info, &mut state); + inst.emit(&[], &mut buf, &info, &mut state); buf.bind_label(label(2)); let inst = Inst::Nop4; - inst.emit(&mut buf, &info, &mut state); - inst.emit(&mut buf, &info, &mut state); + inst.emit(&[], &mut buf, &info, &mut state); + inst.emit(&[], &mut buf, &info, &mut state); let inst = Inst::Jump { dest: target(0) }; - inst.emit(&mut buf, &info, &mut state); + inst.emit(&[], &mut buf, &info, &mut state); buf.bind_label(label(3)); let inst = Inst::Jump { dest: target(4) }; - inst.emit(&mut buf, &info, &mut state); + inst.emit(&[], &mut buf, &info, &mut state); buf.bind_label(label(4)); let inst = Inst::Jump { dest: target(5) }; - inst.emit(&mut buf, &info, &mut state); + inst.emit(&[], &mut buf, &info, &mut state); buf.bind_label(label(5)); let inst = Inst::Jump { dest: target(7) }; - inst.emit(&mut buf, &info, &mut state); + inst.emit(&[], &mut buf, &info, &mut state); buf.bind_label(label(6)); let inst = Inst::Nop4; - inst.emit(&mut buf, &info, &mut state); + inst.emit(&[], &mut buf, &info, &mut state); buf.bind_label(label(7)); - let inst = Inst::Ret; - inst.emit(&mut buf, &info, &mut state); + let inst = Inst::Ret { rets: vec![] }; + inst.emit(&[], &mut buf, &info, &mut state); let buf = buf.finish(); @@ -2009,23 +2009,23 @@ mod test { buf.bind_label(label(0)); let inst = Inst::Jump { dest: target(1) }; - inst.emit(&mut buf, &info, &mut state); + inst.emit(&[], &mut buf, &info, &mut state); buf.bind_label(label(1)); let inst = Inst::Jump { dest: target(2) }; - inst.emit(&mut buf, &info, &mut state); + inst.emit(&[], &mut buf, &info, &mut state); buf.bind_label(label(2)); let inst = Inst::Jump { dest: target(3) }; - inst.emit(&mut buf, &info, &mut state); + inst.emit(&[], &mut buf, &info, &mut state); buf.bind_label(label(3)); let inst = Inst::Jump { dest: target(4) }; - inst.emit(&mut buf, &info, &mut state); + inst.emit(&[], &mut buf, &info, &mut state); buf.bind_label(label(4)); let inst = Inst::Jump { dest: target(1) }; - inst.emit(&mut buf, &info, &mut state); + inst.emit(&[], &mut buf, &info, &mut state); let buf = buf.finish(); diff --git a/cranelift/codegen/src/machinst/compile.rs b/cranelift/codegen/src/machinst/compile.rs index 67593d940a..1bdfe24b52 100644 --- a/cranelift/codegen/src/machinst/compile.rs +++ b/cranelift/codegen/src/machinst/compile.rs @@ -2,12 +2,11 @@ use crate::ir::Function; use crate::isa::TargetIsa; -use crate::log::DeferredDisplay; use crate::machinst::*; -use crate::settings; use crate::timing; -use regalloc::{allocate_registers_with_opts, Algorithm, Options, PrettyPrint}; +use regalloc2::RegallocOptions; +use regalloc2::{self, MachineEnv}; /// Compile the given function down to VCode with allocated registers, ready /// for binary emission. @@ -15,100 +14,38 @@ pub fn compile( f: &Function, b: &B, abi: Box>, - reg_universe: &RealRegUniverse, + machine_env: &MachineEnv, emit_info: ::Info, -) -> CodegenResult> -where - B::MInst: PrettyPrint, -{ +) -> CodegenResult<(VCode, regalloc2::Output)> { // Compute lowered block order. let block_order = BlockLoweringOrder::new(f); // Build the lowering context. let lower = Lower::new(f, abi, emit_info, block_order)?; // Lower the IR. - let (mut vcode, stack_map_request_info) = { + let vcode = { let _tt = timing::vcode_lower(); lower.lower(b)? }; - // Creating the vcode string representation may be costly for large functions, so defer its - // rendering. - log::trace!( - "vcode from lowering: \n{}", - DeferredDisplay::new(|| vcode.show_rru(Some(reg_universe))) - ); + log::trace!("vcode from lowering: \n{:?}", vcode); // Perform register allocation. - let (run_checker, algorithm) = match vcode.flags().regalloc() { - settings::Regalloc::Backtracking => (false, Algorithm::Backtracking(Default::default())), - settings::Regalloc::BacktrackingChecked => { - (true, Algorithm::Backtracking(Default::default())) - } - settings::Regalloc::ExperimentalLinearScan => { - (false, Algorithm::LinearScan(Default::default())) - } - settings::Regalloc::ExperimentalLinearScanChecked => { - (true, Algorithm::LinearScan(Default::default())) - } - }; - - #[cfg(feature = "regalloc-snapshot")] - { - use std::fs; - use std::path::Path; - if let Some(path) = std::env::var("SERIALIZE_REGALLOC").ok() { - let snapshot = regalloc::IRSnapshot::from_function(&vcode, reg_universe); - let serialized = bincode::serialize(&snapshot).expect("couldn't serialize snapshot"); - - let file_path = Path::new(&path).join(Path::new(&format!("ir{}.bin", f.name))); - fs::write(file_path, &serialized).expect("couldn't write IR snapshot file"); - } - } - - // If either there are no reference-typed values, or else there are - // but there are no safepoints at which we need to know about them, - // then we don't need stack maps. - let sri = if stack_map_request_info.reftyped_vregs.len() > 0 - && stack_map_request_info.safepoint_insns.len() > 0 - { - Some(&stack_map_request_info) - } else { - None - }; - - let result = { + let regalloc_result = { let _tt = timing::regalloc(); - allocate_registers_with_opts( - &mut vcode, - reg_universe, - sri, - Options { - run_checker, - algorithm, - }, - ) - .map_err(|err| { - log::error!( - "Register allocation error for vcode\n{}\nError: {:?}", - vcode.show_rru(Some(reg_universe)), + let mut options = RegallocOptions::default(); + options.verbose_log = log::log_enabled!(log::Level::Trace); + regalloc2::run(&vcode, machine_env, &options) + .map_err(|err| { + log::error!( + "Register allocation error for vcode\n{:?}\nError: {:?}\nCLIF for error:\n{:?}", + vcode, + err, + f, + ); err - ); - err - }) - .expect("register allocation") + }) + .expect("register allocation") }; - // Reorder vcode into final order and copy out final instruction sequence - // all at once. This also inserts prologues/epilogues. - { - let _tt = timing::vcode_post_ra(); - vcode.replace_insns_from_regalloc(result); - } - - log::trace!( - "vcode after regalloc: final version:\n{}", - DeferredDisplay::new(|| vcode.show_rru(Some(reg_universe))) - ); - - Ok(vcode) + Ok((vcode, regalloc_result)) } diff --git a/cranelift/codegen/src/machinst/debug.rs b/cranelift/codegen/src/machinst/debug.rs deleted file mode 100644 index e2eafe50b4..0000000000 --- a/cranelift/codegen/src/machinst/debug.rs +++ /dev/null @@ -1,525 +0,0 @@ -//! Debug info analysis: computes value-label ranges from value-label markers in -//! generated VCode. -//! -//! We "reverse-engineer" debug info like this because it is far more reliable -//! than generating it while emitting code and keeping it in sync. -//! -//! This works by (i) observing "value-label marker" instructions, which are -//! semantically just an assignment from a register to a "value label" (which -//! one can think of as another register; they represent, e.g., Wasm locals) at -//! a certain point in the code, and (ii) observing loads and stores to the -//! stack and register moves. -//! -//! We track, at every program point, the correspondence between each value -//! label and *all* locations in which it resides. E.g., if it is stored to the -//! stack, we remember that it is in both a register and the stack slot; but if -//! the register is later overwritten, then we have it just in the stack slot. -//! This allows us to avoid false-positives observing loads/stores that we think -//! are spillslots but really aren't. -//! -//! We do a standard forward dataflow analysis to compute this info. - -use crate::ir::ValueLabel; -use crate::machinst::*; -use crate::value_label::{LabelValueLoc, ValueLabelsRanges, ValueLocRange}; -use log::trace; -use regalloc::{Reg, RegUsageCollector}; -use std::collections::{HashMap, HashSet}; -use std::hash::Hash; - -/// Location of a labeled value: in a register or in a stack slot. Note that a -/// value may live in more than one location; `AnalysisInfo` maps each -/// value-label to multiple `ValueLoc`s. -#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] -enum ValueLoc { - Reg(Reg), - /// Nominal-SP offset. - Stack(i64), -} - -impl From for LabelValueLoc { - fn from(v: ValueLoc) -> Self { - match v { - ValueLoc::Reg(r) => LabelValueLoc::Reg(r), - ValueLoc::Stack(off) => LabelValueLoc::SPOffset(off), - } - } -} - -impl ValueLoc { - fn is_reg(self) -> bool { - match self { - ValueLoc::Reg(_) => true, - _ => false, - } - } - fn is_stack(self) -> bool { - match self { - ValueLoc::Stack(_) => true, - _ => false, - } - } -} - -/// Mappings at one program point. -#[derive(Clone, Debug)] -struct AnalysisInfo { - /// Nominal SP relative to real SP. If `None`, then the offset is - /// indeterminate (i.e., we merged to the lattice 'bottom' element). This - /// should not happen in well-formed code. - nominal_sp_offset: Option, - /// Forward map from labeled values to sets of locations. - label_to_locs: HashMap>, - /// Reverse map for each register indicating the value it holds, if any. - reg_to_label: HashMap, - /// Reverse map for each stack offset indicating the value it holds, if any. - stack_to_label: HashMap, -} - -/// Get the registers written (mod'd or def'd) by a machine instruction. -fn get_inst_writes(m: &M) -> Vec { - // TODO: expose this part of regalloc.rs's interface publicly. - let mut vecs = RegUsageCollector::get_empty_reg_vecs_test_framework_only(false); - let mut coll = RegUsageCollector::new(&mut vecs); - m.get_regs(&mut coll); - vecs.defs.extend(vecs.mods.into_iter()); - vecs.defs -} - -impl AnalysisInfo { - /// Create a new analysis state. This is the "top" lattice element at which - /// the fixpoint dataflow analysis starts. - fn new() -> Self { - AnalysisInfo { - nominal_sp_offset: Some(0), - label_to_locs: HashMap::new(), - reg_to_label: HashMap::new(), - stack_to_label: HashMap::new(), - } - } - - /// Remove all locations for a given labeled value. Used when the labeled - /// value is redefined (so old values become stale). - fn clear_label(&mut self, label: ValueLabel) { - if let Some(locs) = self.label_to_locs.remove(&label) { - for loc in locs { - match loc { - ValueLoc::Reg(r) => { - self.reg_to_label.remove(&r); - } - ValueLoc::Stack(off) => { - self.stack_to_label.remove(&off); - } - } - } - } - } - - /// Remove a label from a register, if any. Used, e.g., if the register is - /// overwritten. - fn clear_reg(&mut self, reg: Reg) { - if let Some(label) = self.reg_to_label.remove(®) { - if let Some(locs) = self.label_to_locs.get_mut(&label) { - locs.remove(&ValueLoc::Reg(reg)); - } - } - } - - /// Remove a label from a stack offset, if any. Used, e.g., when the stack - /// slot is overwritten. - fn clear_stack_off(&mut self, off: i64) { - if let Some(label) = self.stack_to_label.remove(&off) { - if let Some(locs) = self.label_to_locs.get_mut(&label) { - locs.remove(&ValueLoc::Stack(off)); - } - } - } - - /// Indicate that a labeled value is newly defined and its new value is in - /// `reg`. - fn def_label_at_reg(&mut self, label: ValueLabel, reg: Reg) { - self.clear_label(label); - self.label_to_locs - .entry(label) - .or_insert_with(|| HashSet::new()) - .insert(ValueLoc::Reg(reg)); - self.reg_to_label.insert(reg, label); - } - - /// Process a store from a register to a stack slot (offset). - fn store_reg(&mut self, reg: Reg, off: i64) { - self.clear_stack_off(off); - if let Some(label) = self.reg_to_label.get(®) { - if let Some(locs) = self.label_to_locs.get_mut(label) { - locs.insert(ValueLoc::Stack(off)); - } - self.stack_to_label.insert(off, *label); - } - } - - /// Process a load from a stack slot (offset) to a register. - fn load_reg(&mut self, reg: Reg, off: i64) { - self.clear_reg(reg); - if let Some(&label) = self.stack_to_label.get(&off) { - if let Some(locs) = self.label_to_locs.get_mut(&label) { - locs.insert(ValueLoc::Reg(reg)); - } - self.reg_to_label.insert(reg, label); - } - } - - /// Process a move from one register to another. - fn move_reg(&mut self, to: Reg, from: Reg) { - self.clear_reg(to); - if let Some(&label) = self.reg_to_label.get(&from) { - if let Some(locs) = self.label_to_locs.get_mut(&label) { - locs.insert(ValueLoc::Reg(to)); - } - self.reg_to_label.insert(to, label); - } - } - - /// Update the analysis state w.r.t. an instruction's effects. Given the - /// state just before `inst`, this method updates `self` to be the state - /// just after `inst`. - fn step(&mut self, inst: &M) { - for write in get_inst_writes(inst) { - self.clear_reg(write); - } - if let Some((label, reg)) = inst.defines_value_label() { - self.def_label_at_reg(label, reg); - } - match inst.stack_op_info() { - Some(MachInstStackOpInfo::LoadNomSPOff(reg, offset)) => { - self.load_reg(reg, offset + self.nominal_sp_offset.unwrap()); - } - Some(MachInstStackOpInfo::StoreNomSPOff(reg, offset)) => { - self.store_reg(reg, offset + self.nominal_sp_offset.unwrap()); - } - Some(MachInstStackOpInfo::NomSPAdj(offset)) => { - if self.nominal_sp_offset.is_some() { - self.nominal_sp_offset = Some(self.nominal_sp_offset.unwrap() + offset); - } - } - _ => {} - } - if let Some((to, from)) = inst.is_move() { - let to = to.to_reg(); - self.move_reg(to, from); - } - } -} - -/// Trait used to implement the dataflow analysis' meet (intersect) function -/// onthe `AnalysisInfo` components. For efficiency, this is implemented as a -/// mutation on the LHS, rather than a pure functional operation. -trait IntersectFrom { - fn intersect_from(&mut self, other: &Self) -> IntersectResult; -} - -/// Result of an intersection operation. Indicates whether the mutated LHS -/// (which becomes the intersection result) differs from the original LHS. Also -/// indicates if the value has become "empty" and should be removed from a -/// parent container, if any. -struct IntersectResult { - /// Did the intersection change the LHS input (the one that was mutated into - /// the result)? This is needed to drive the fixpoint loop; when no more - /// changes occur, then we have converted. - changed: bool, - /// Is the resulting value "empty"? This can be used when a container, such - /// as a map, holds values of this (intersection result) type; when - /// `is_empty` is true for the merge of the values at a particular key, we - /// can remove that key from the merged (intersected) result. This is not - /// necessary for analysis correctness but reduces the memory and runtime - /// cost of the fixpoint loop. - is_empty: bool, -} - -impl IntersectFrom for AnalysisInfo { - fn intersect_from(&mut self, other: &Self) -> IntersectResult { - let mut changed = false; - changed |= self - .nominal_sp_offset - .intersect_from(&other.nominal_sp_offset) - .changed; - changed |= self - .label_to_locs - .intersect_from(&other.label_to_locs) - .changed; - changed |= self - .reg_to_label - .intersect_from(&other.reg_to_label) - .changed; - changed |= self - .stack_to_label - .intersect_from(&other.stack_to_label) - .changed; - IntersectResult { - changed, - is_empty: false, - } - } -} - -impl IntersectFrom for HashMap -where - K: Copy + Eq + Hash, - V: IntersectFrom, -{ - /// Intersection for hashmap: remove keys that are not in both inputs; - /// recursively intersect values for keys in common. - fn intersect_from(&mut self, other: &Self) -> IntersectResult { - let mut changed = false; - let mut remove_keys = vec![]; - for k in self.keys() { - if !other.contains_key(k) { - remove_keys.push(*k); - } - } - for k in &remove_keys { - changed = true; - self.remove(k); - } - - remove_keys.clear(); - for k in other.keys() { - if let Some(v) = self.get_mut(k) { - let result = v.intersect_from(other.get(k).unwrap()); - changed |= result.changed; - if result.is_empty { - remove_keys.push(*k); - } - } - } - for k in &remove_keys { - changed = true; - self.remove(k); - } - - IntersectResult { - changed, - is_empty: self.len() == 0, - } - } -} -impl IntersectFrom for HashSet -where - T: Copy + Eq + Hash, -{ - /// Intersection for hashset: just take the set intersection. - fn intersect_from(&mut self, other: &Self) -> IntersectResult { - let mut changed = false; - let mut remove = vec![]; - for val in self.iter() { - if !other.contains(val) { - remove.push(*val); - } - } - for val in remove { - changed = true; - self.remove(&val); - } - - IntersectResult { - changed, - is_empty: self.len() == 0, - } - } -} -impl IntersectFrom for ValueLabel { - // Intersection for labeled value: remove if not equal. This is equivalent - // to a three-level lattice with top, bottom, and unordered set of - // individual labels in between. - fn intersect_from(&mut self, other: &Self) -> IntersectResult { - IntersectResult { - changed: false, - is_empty: *self != *other, - } - } -} -impl IntersectFrom for Option -where - T: Copy + Eq, -{ - /// Intersectino for Option: recursively intersect if both `Some`, else - /// `None`. - fn intersect_from(&mut self, other: &Self) -> IntersectResult { - let mut changed = false; - if !(self.is_some() && other.is_some() && self == other) { - changed = true; - *self = None; - } - IntersectResult { - changed, - is_empty: self.is_none(), - } - } -} - -/// Compute the value-label ranges (locations for program-point ranges for -/// labeled values) from a given `VCode` compilation result. -/// -/// In order to compute this information, we perform a dataflow analysis on the -/// machine code. To do so, and translate the results into a form usable by the -/// debug-info consumers, we need to know two additional things: -/// -/// - The machine-code layout (code offsets) of the instructions. DWARF is -/// encoded in terms of instruction *ends* (and we reason about value -/// locations at program points *after* instructions, to match this), so we -/// take an array `inst_ends`, giving us code offsets for each instruction's -/// end-point. (Note that this is one *past* the last byte; so a 4-byte -/// instruction at offset 0 has an end offset of 4.) -/// -/// - The locations of the labels to which branches will jump. Branches can tell -/// us about their targets in terms of `MachLabel`s, but we don't know where -/// those `MachLabel`s will be placed in the linear array of instructions. We -/// take the array `label_insn_index` to provide this info: for a label with -/// index `l`, `label_insn_index[l]` is the index of the instruction before -/// which that label is bound. -pub(crate) fn compute( - insts: &[I], - layout_info: &InstsLayoutInfo, -) -> ValueLabelsRanges { - let inst_start = |idx: usize| { - if idx == 0 { - 0 - } else { - layout_info.inst_end_offsets[idx - 1] - } - }; - - trace!("compute: insts ="); - for i in 0..insts.len() { - trace!( - " #{} end: {} -> {:?}", - i, - layout_info.inst_end_offsets[i], - insts[i] - ); - } - trace!("label_insn_index: {:?}", layout_info.label_inst_indices); - - // Info at each block head, indexed by label. - let mut block_starts: HashMap = HashMap::new(); - - // Initialize state at entry. - block_starts.insert(0, AnalysisInfo::new()); - - // Worklist: label indices for basic blocks. - let mut worklist = Vec::new(); - let mut worklist_set = HashSet::new(); - worklist.push(0); - worklist_set.insert(0); - - while !worklist.is_empty() { - let block = worklist.pop().unwrap(); - worklist_set.remove(&block); - - let mut state = block_starts.get(&block).unwrap().clone(); - trace!("at block {} -> state: {:?}", block, state); - // Iterate for each instruction in the block (we break at the first - // terminator we see). - let mut index = layout_info.label_inst_indices[block as usize]; - while index < insts.len() as u32 { - state.step(&insts[index as usize]); - trace!(" -> inst #{}: {:?}", index, insts[index as usize]); - trace!(" --> state: {:?}", state); - - let term = insts[index as usize].is_term(); - if term.is_term() { - for succ in term.get_succs() { - trace!(" SUCCESSOR block {}", succ.get()); - if let Some(succ_state) = block_starts.get_mut(&succ.get()) { - trace!(" orig state: {:?}", succ_state); - if succ_state.intersect_from(&state).changed { - if worklist_set.insert(succ.get()) { - worklist.push(succ.get()); - } - trace!(" (changed)"); - } - trace!(" new state: {:?}", succ_state); - } else { - // First time seeing this block - block_starts.insert(succ.get(), state.clone()); - worklist.push(succ.get()); - worklist_set.insert(succ.get()); - } - } - break; - } - - index += 1; - } - } - - // Now iterate over blocks one last time, collecting - // value-label locations. - - let mut value_labels_ranges: ValueLabelsRanges = HashMap::new(); - for block in 0..layout_info.label_inst_indices.len() { - let start_index = layout_info.label_inst_indices[block]; - let end_index = if block == layout_info.label_inst_indices.len() - 1 { - insts.len() as u32 - } else { - layout_info.label_inst_indices[block + 1] - }; - let block = block as u32; - let mut state = block_starts.get(&block).unwrap().clone(); - for index in start_index..end_index { - let offset = inst_start(index as usize); - let end = layout_info.inst_end_offsets[index as usize]; - - // Cold blocks cause instructions to occur out-of-order wrt - // others. We rely on the monotonic mapping from instruction - // index to offset in machine code for this analysis to work, - // so we just skip debuginfo for cold blocks. This should be - // generally fine, as cold blocks generally constitute - // slowpaths for expansions of particular ops, rather than - // user-written code. - if layout_info.start_of_cold_code.is_some() - && offset >= layout_info.start_of_cold_code.unwrap() - { - continue; - } - - assert!(offset <= end); - state.step(&insts[index as usize]); - - for (label, locs) in &state.label_to_locs { - trace!(" inst {} has label {:?} -> locs {:?}", index, label, locs); - // Find an appropriate loc: a register if possible, otherwise pick the first stack - // loc. - let reg = locs.iter().cloned().find(|l| l.is_reg()); - let loc = reg.or_else(|| locs.iter().cloned().find(|l| l.is_stack())); - if let Some(loc) = loc { - let loc = LabelValueLoc::from(loc); - let list = value_labels_ranges.entry(*label).or_insert_with(|| vec![]); - // If the existing location list for this value-label is - // either empty, or has an end location that does not extend - // to the current offset, then we have to append a new - // entry. Otherwise, we can extend the current entry. - // - // Note that `end` is one past the end of the instruction; - // it appears that `end` is exclusive, so a mapping valid at - // offset 5 will have start = 5, end = 6. - if list - .last() - .map(|last| last.end <= offset || last.loc != loc) - .unwrap_or(true) - { - list.push(ValueLocRange { - loc, - start: end, - end: end + 1, - }); - } else { - list.last_mut().unwrap().end = end + 1; - } - } - } - } - } - - trace!("ret: {:?}", value_labels_ranges); - value_labels_ranges -} diff --git a/cranelift/codegen/src/machinst/helpers.rs b/cranelift/codegen/src/machinst/helpers.rs index 40139d61ee..30446b04ab 100644 --- a/cranelift/codegen/src/machinst/helpers.rs +++ b/cranelift/codegen/src/machinst/helpers.rs @@ -1,8 +1,8 @@ //! Miscellaneous helpers for machine backends. use super::{InsnOutput, LowerCtx, VCodeInst, ValueRegs}; +use super::{Reg, Writable}; use crate::ir::Type; -use regalloc::{Reg, Writable}; use std::ops::{Add, BitAnd, Not, Sub}; /// Returns the size (in bits) of a given type. diff --git a/cranelift/codegen/src/machinst/isle.rs b/cranelift/codegen/src/machinst/isle.rs index 052ed86e49..f6f8657a13 100644 --- a/cranelift/codegen/src/machinst/isle.rs +++ b/cranelift/codegen/src/machinst/isle.rs @@ -1,8 +1,7 @@ use crate::ir::{types, Inst, Value, ValueList}; -use crate::machinst::{get_output_reg, InsnOutput, LowerCtx, MachInst, RegRenamer}; +use crate::machinst::{get_output_reg, InsnOutput, LowerCtx, Reg, Writable}; use alloc::boxed::Box; use alloc::vec::Vec; -use regalloc::{Reg, Writable}; use smallvec::SmallVec; use std::cell::Cell; @@ -107,7 +106,8 @@ macro_rules! isle_prelude_methods { #[inline] fn invalid_reg(&mut self) -> Reg { - Reg::invalid() + use crate::machinst::valueregs::InvalidSentinel; + Reg::invalid_sentinel() } #[inline] @@ -467,7 +467,6 @@ where pub lower_ctx: &'a mut C, pub flags: &'a F, pub isa_flags: &'a I, - pub emitted_insts: SmallVec<[(C::I, bool); N]>, } /// Shared lowering code amongst all backends for doing ISLE-based lowering. @@ -482,7 +481,6 @@ pub(crate) fn lower_common( outputs: &[InsnOutput], inst: Inst, isle_lower: IF, - map_regs: fn(&mut C::I, &RegRenamer), ) -> Result<(), ()> where C: LowerCtx, @@ -495,7 +493,6 @@ where lower_ctx, flags, isa_flags, - emitted_insts: SmallVec::new(), }; let temp_regs = isle_lower(&mut isle_ctx, inst).ok_or(())?; @@ -514,10 +511,15 @@ where } // The ISLE generated code emits its own registers to define the - // instruction's lowered values in. We rename those registers to the - // registers they were assigned when their value was used as an operand in - // earlier lowerings. - let mut renamer = RegRenamer::default(); + // instruction's lowered values in. However, other instructions + // that use this SSA value will be lowered assuming that the value + // is generated into a pre-assigned, different, register. + // + // To connect the two, we set up "aliases" in the VCodeBuilder + // that apply when it is building the Operand table for the + // regalloc to use. These aliases effectively rewrite any use of + // the pre-assigned register to the register that was returned by + // the ISLE lowering logic. for i in 0..outputs.len() { let regs = temp_regs[i]; let dsts = get_output_reg(isle_ctx.lower_ctx, outputs[i]); @@ -528,41 +530,11 @@ where // Flags values do not occupy any registers. assert!(regs.len() == 0); } else { - let (_, tys) = ::rc_for_type(ty).unwrap(); - assert!(regs.len() == tys.len()); - assert!(regs.len() == dsts.len()); - for ((dst, temp), ty) in dsts.regs().iter().zip(regs.regs().iter()).zip(tys) { - renamer.add_rename(*temp, dst.to_reg(), *ty); + for (dst, temp) in dsts.regs().iter().zip(regs.regs().iter()) { + isle_ctx.lower_ctx.set_vreg_alias(dst.to_reg(), *temp); } } } - for (inst, _) in isle_ctx.emitted_insts.iter_mut() { - map_regs(inst, &renamer); - } - - // If any renamed register wasn't actually defined in the ISLE-generated - // instructions then what we're actually doing is "renaming" an input to a - // new name which requires manually inserting a `mov` instruction. Note that - // this typically doesn't happen and is only here for cases where the input - // is sometimes passed through unmodified to the output, such as - // zero-extending a 64-bit input to a 128-bit output which doesn't actually - // change the input and simply produces another zero'd register. - for (old, new, ty) in renamer.unmapped_defs() { - isle_ctx - .lower_ctx - .emit(::gen_move(Writable::from_reg(new), old, ty)); - } - - // Once everything is remapped we forward all emitted instructions to the - // `lower_ctx`. Note that this happens after the synthetic mov's above in - // case any of these instruction use those movs. - for (inst, is_safepoint) in isle_ctx.emitted_insts { - if is_safepoint { - lower_ctx.emit_safepoint(inst); - } else { - lower_ctx.emit(inst); - } - } Ok(()) } diff --git a/cranelift/codegen/src/machinst/lower.rs b/cranelift/codegen/src/machinst/lower.rs index 314a294d2e..6cd7bfa5e8 100644 --- a/cranelift/codegen/src/machinst/lower.rs +++ b/cranelift/codegen/src/machinst/lower.rs @@ -18,17 +18,19 @@ use crate::ir::{ }; use crate::machinst::{ non_writable_value_regs, writable_value_regs, ABICallee, BlockIndex, BlockLoweringOrder, - LoweredBlock, MachLabel, VCode, VCodeBuilder, VCodeConstant, VCodeConstantData, VCodeConstants, - VCodeInst, ValueRegs, + LoweredBlock, MachLabel, Reg, VCode, VCodeBuilder, VCodeConstant, VCodeConstantData, + VCodeConstants, VCodeInst, ValueRegs, Writable, }; use crate::CodegenResult; use alloc::boxed::Box; use alloc::vec::Vec; use core::convert::TryInto; -use regalloc::{Reg, StackmapRequestInfo, Writable}; +use regalloc2::VReg; use smallvec::{smallvec, SmallVec}; use std::fmt::Debug; +use super::{first_user_vreg_index, VCodeBuildDirection}; + /// An "instruction color" partitions CLIF instructions by side-effecting ops. /// All instructions with the same "color" are guaranteed not to be separated by /// any side-effecting op (for this purpose, loads are also considered @@ -160,8 +162,6 @@ pub trait LowerCtx { fn alloc_tmp(&mut self, ty: Type) -> ValueRegs>; /// Emit a machine instruction. fn emit(&mut self, mach_inst: Self::I); - /// Emit a machine instruction that is a safepoint. - fn emit_safepoint(&mut self, mach_inst: Self::I); /// Indicate that the side-effect of an instruction has been sunk to the /// current scan location. This should only be done with the instruction's /// original results are not used (i.e., `put_input_in_regs` is not invoked @@ -178,6 +178,9 @@ pub trait LowerCtx { /// Cause the value in `reg` to be in a virtual reg, by copying it into a new virtual reg /// if `reg` is a real reg. `ty` describes the type of the value in `reg`. fn ensure_in_vreg(&mut self, reg: Reg, ty: Type) -> Reg; + + /// Note that one vreg is to be treated as an alias of another. + fn set_vreg_alias(&mut self, from: Reg, to: Reg); } /// A representation of all of the ways in which a value is available, aside @@ -232,14 +235,6 @@ pub trait LowerBackend { } } -/// A pending instruction to insert and auxiliary information about it: its source location and -/// whether it is a safepoint. -struct InstTuple { - loc: SourceLoc, - is_safepoint: bool, - inst: I, -} - /// Machine-independent lowering driver / machine-instruction container. Maintains a correspondence /// from original Inst to MachInsts. pub struct Lower<'func, I: VCodeInst> { @@ -287,20 +282,10 @@ pub struct Lower<'func, I: VCodeInst> { inst_sunk: FxHashSet, /// Next virtual register number to allocate. - next_vreg: u32, - - /// Insts in reverse block order, before final copy to vcode. - block_insts: Vec>, - - /// Ranges in `block_insts` constituting BBs. - block_ranges: Vec<(usize, usize)>, - - /// Instructions collected for the BB in progress, in reverse order, with - /// source-locs attached. - bb_insts: Vec>, + next_vreg: usize, /// Instructions collected for the CLIF inst in progress, in forward order. - ir_insts: Vec>, + ir_insts: Vec, /// The register to use for GetPinnedReg, if any, on this architecture. pinned_reg: Option, @@ -324,22 +309,22 @@ pub enum RelocDistance { fn alloc_vregs( ty: Type, - next_vreg: &mut u32, + next_vreg: &mut usize, vcode: &mut VCodeBuilder, ) -> CodegenResult> { let v = *next_vreg; let (regclasses, tys) = I::rc_for_type(ty)?; - *next_vreg += regclasses.len() as u32; - let regs = match regclasses { - &[rc0] => ValueRegs::one(Reg::new_virtual(rc0, v)), - &[rc0, rc1] => ValueRegs::two(Reg::new_virtual(rc0, v), Reg::new_virtual(rc1, v + 1)), + *next_vreg += regclasses.len(); + let regs: ValueRegs = match regclasses { + &[rc0] => ValueRegs::one(VReg::new(v, rc0).into()), + &[rc0, rc1] => ValueRegs::two(VReg::new(v, rc0).into(), VReg::new(v + 1, rc1).into()), // We can extend this if/when we support 32-bit targets; e.g., // an i128 on a 32-bit machine will need up to four machine regs // for a `Value`. _ => panic!("Value must reside in 1 or 2 registers"), }; for (®_ty, ®) in tys.iter().zip(regs.regs().iter()) { - vcode.set_vreg_type(reg.to_virtual_reg(), reg_ty); + vcode.set_vreg_type(reg.to_virtual_reg().unwrap(), reg_ty); } Ok(regs) } @@ -358,9 +343,15 @@ impl<'func, I: VCodeInst> Lower<'func, I> { block_order: BlockLoweringOrder, ) -> CodegenResult> { let constants = VCodeConstants::with_capacity(f.dfg.constants.len()); - let mut vcode = VCodeBuilder::new(abi, emit_info, block_order, constants); + let mut vcode = VCodeBuilder::new( + abi, + emit_info, + block_order, + constants, + VCodeBuildDirection::Backward, + ); - let mut next_vreg: u32 = 0; + let mut next_vreg: usize = first_user_vreg_index(); let mut value_regs = SecondaryMap::with_default(ValueRegs::invalid()); @@ -381,10 +372,11 @@ impl<'func, I: VCodeInst> Lower<'func, I> { let regs = alloc_vregs(ty, &mut next_vreg, &mut vcode)?; value_regs[result] = regs; log::trace!( - "bb {} inst {} ({:?}): result regs {:?}", + "bb {} inst {} ({:?}): result {} regs {:?}", bb, inst, f.dfg[inst], + result, regs, ); } @@ -459,9 +451,6 @@ impl<'func, I: VCodeInst> Lower<'func, I> { inst_sunk: FxHashSet::default(), cur_scan_entry_color: None, cur_inst: None, - block_insts: vec![], - block_ranges: vec![], - bb_insts: vec![], ir_insts: vec![], pinned_reg: None, vm_context, @@ -475,6 +464,12 @@ impl<'func, I: VCodeInst> Lower<'func, I> { entry_bb, self.f.dfg.block_params(entry_bb) ); + + // Make the vmctx available in debuginfo. + if let Some(vmctx_val) = self.f.special_param(ArgumentPurpose::VMContext) { + self.emit_value_label_marks_for_value(vmctx_val); + } + for (i, param) in self.f.dfg.block_params(entry_bb).iter().enumerate() { if !self.vcode.abi().arg_is_needed_in_body(i) { continue; @@ -509,14 +504,6 @@ impl<'func, I: VCodeInst> Lower<'func, I> { } fn gen_retval_setup(&mut self, gen_ret_inst: GenerateReturn) { - // Hack: to keep `vmctx` alive, if it exists, we emit a value label here - // for it if debug info is requested. This ensures that it exists either - // in a register or spillslot throughout the entire function body, and - // allows for a better debugging experience. - if let Some(vmctx_val) = self.f.special_param(ArgumentPurpose::VMContext) { - self.emit_value_label_marks_for_value(vmctx_val); - } - let retval_regs = self.retval_regs.clone(); for (i, regs) in retval_regs.into_iter().enumerate() { let regs = writable_value_regs(regs); @@ -534,141 +521,16 @@ impl<'func, I: VCodeInst> Lower<'func, I> { GenerateReturn::No => self.vcode.abi().gen_epilogue_placeholder(), }; self.emit(inst); - } - fn lower_edge(&mut self, pred: Block, inst: Inst, succ: Block) -> CodegenResult<()> { - log::trace!("lower_edge: pred {} succ {}", pred, succ); - - let num_args = self.f.dfg.block_params(succ).len(); - debug_assert!(num_args == self.f.dfg.inst_variable_args(inst).len()); - - // Most blocks have no params, so skip all the hoop-jumping below and make an early exit. - if num_args == 0 { - return Ok(()); - } - - self.cur_inst = Some(inst); - - // Make up two vectors of info: - // - // * one for dsts which are to be assigned constants. We'll deal with those second, so - // as to minimise live ranges. - // - // * one for dsts whose sources are non-constants. - - let mut const_bundles: SmallVec<[_; 16]> = SmallVec::new(); - let mut var_bundles: SmallVec<[_; 16]> = SmallVec::new(); - - let mut i = 0; - for (dst_val, src_val) in self - .f - .dfg - .block_params(succ) - .iter() - .zip(self.f.dfg.inst_variable_args(inst).iter()) - { - let src_val = self.f.dfg.resolve_aliases(*src_val); - let ty = self.f.dfg.value_type(src_val); - - debug_assert!(ty == self.f.dfg.value_type(*dst_val)); - let dst_regs = self.value_regs[*dst_val]; - - let input = self.get_value_as_source_or_const(src_val); - log::trace!("jump arg {} is {}", i, src_val); - i += 1; - - if let Some(c) = input.constant { - log::trace!(" -> constant {}", c); - const_bundles.push((ty, writable_value_regs(dst_regs), c)); - } else { - let src_regs = self.put_value_in_regs(src_val); - log::trace!(" -> reg {:?}", src_regs); - // Skip self-assignments. Not only are they pointless, they falsely trigger the - // overlap-check below and hence can cause a lot of unnecessary copying through - // temporaries. - if dst_regs != src_regs { - var_bundles.push((ty, writable_value_regs(dst_regs), src_regs)); - } + // Hack: generate a virtual instruction that uses vmctx in + // order to keep it alive for the duration of the function, + // for the benefit of debuginfo. + if self.f.dfg.values_labels.is_some() { + if let Some(vmctx_val) = self.f.special_param(ArgumentPurpose::VMContext) { + let vmctx_reg = self.value_regs[vmctx_val].only_reg().unwrap(); + self.emit(I::gen_dummy_use(vmctx_reg)); } } - - // Deal first with the moves whose sources are variables. - - // FIXME: use regalloc.rs' SparseSetU here. This would avoid all heap allocation - // for cases of up to circa 16 args. Currently not possible because regalloc.rs - // does not export it. - let mut src_reg_set = FxHashSet::::default(); - for (_, _, src_regs) in &var_bundles { - for ® in src_regs.regs() { - src_reg_set.insert(reg); - } - } - let mut overlaps = false; - 'outer: for (_, dst_regs, _) in &var_bundles { - for ® in dst_regs.regs() { - if src_reg_set.contains(®.to_reg()) { - overlaps = true; - break 'outer; - } - } - } - - // If, as is mostly the case, the source and destination register sets are non - // overlapping, then we can copy directly, so as to save the register allocator work. - if !overlaps { - for (ty, dst_regs, src_regs) in &var_bundles { - let (_, reg_tys) = I::rc_for_type(*ty)?; - for ((dst, src), reg_ty) in dst_regs - .regs() - .iter() - .zip(src_regs.regs().iter()) - .zip(reg_tys.iter()) - { - self.emit(I::gen_move(*dst, *src, *reg_ty)); - } - } - } else { - // There's some overlap, so play safe and copy via temps. - let mut tmp_regs = SmallVec::<[ValueRegs>; 16]>::new(); - for (ty, _, _) in &var_bundles { - tmp_regs.push(self.alloc_tmp(*ty)); - } - for ((ty, _, src_reg), tmp_reg) in var_bundles.iter().zip(tmp_regs.iter()) { - let (_, reg_tys) = I::rc_for_type(*ty)?; - for ((tmp, src), reg_ty) in tmp_reg - .regs() - .iter() - .zip(src_reg.regs().iter()) - .zip(reg_tys.iter()) - { - self.emit(I::gen_move(*tmp, *src, *reg_ty)); - } - } - for ((ty, dst_reg, _), tmp_reg) in var_bundles.iter().zip(tmp_regs.iter()) { - let (_, reg_tys) = I::rc_for_type(*ty)?; - for ((dst, tmp), reg_ty) in dst_reg - .regs() - .iter() - .zip(tmp_reg.regs().iter()) - .zip(reg_tys.iter()) - { - self.emit(I::gen_move(*dst, tmp.to_reg(), *reg_ty)); - } - } - } - - // Now, finally, deal with the moves whose sources are constants. - for (ty, dst_reg, const_val) in &const_bundles { - for inst in I::gen_constant(*dst_reg, *const_val as u128, *ty, |ty| { - self.alloc_tmp(ty).only_reg().unwrap() - }) - .into_iter() - { - self.emit(inst); - } - } - - Ok(()) } /// Has this instruction been sunk to a use-site (i.e., away from its @@ -694,21 +556,24 @@ impl<'func, I: VCodeInst> Lower<'func, I> { self.cur_scan_entry_color = Some(self.block_end_colors[block]); // Lowering loop: // - For each non-branch instruction, in reverse order: - // - If side-effecting (load, store, branch/call/return, possible trap), or if - // used outside of this block, or if demanded by another inst, then lower. + // - If side-effecting (load, store, branch/call/return, + // possible trap), or if used outside of this block, or if + // demanded by another inst, then lower. // - // That's it! Lowering of side-effecting ops will force all *needed* - // (live) non-side-effecting ops to be lowered at the right places, via - // the `use_input_reg()` callback on the `LowerCtx` (that's us). That's - // because `use_input_reg()` sets the eager/demand bit for any insts - // whose result registers are used. + // That's it! Lowering of side-effecting ops will force all + // *needed* (live) non-side-effecting ops to be lowered at the + // right places, via the `use_input_reg()` callback on the + // `LowerCtx` (that's us). That's because `use_input_reg()` + // sets the eager/demand bit for any insts whose result + // registers are used. // - // We build up the BB in reverse instruction order in `bb_insts`. - // Because the machine backend calls `ctx.emit()` in forward order, we - // collect per-IR-inst lowered instructions in `ir_insts`, then reverse - // these and append to `bb_insts` as we go backward through the block. - // `bb_insts` are then reversed again and appended to the VCode at the - // end of the BB (in the toplevel driver `lower()`). + // We set the VCodeBuilder to "backward" mode, so we emit + // blocks in reverse order wrt the BlockIndex sequence, and + // emit instructions in reverse order within blocks. Because + // the machine backend calls `ctx.emit()` in forward order, we + // collect per-IR-inst lowered instructions in `ir_insts`, + // then reverse these and append to the VCode at the end of + // each IR instruction. for inst in self.f.layout.block_insts(block).rev() { let data = &self.f.dfg[inst]; let has_side_effect = has_lowering_side_effect(self.f, inst); @@ -750,9 +615,6 @@ impl<'func, I: VCodeInst> Lower<'func, I> { if has_side_effect || value_needed { log::trace!("lowering: inst {}: {:?}", inst, self.f.dfg[inst]); backend.lower(self, inst)?; - // Emit value-label markers if needed, to later recover debug - // mappings. - self.emit_value_label_markers_for_inst(inst); } if data.opcode().is_return() { // Return: handle specially, using ABI-appropriate sequence. @@ -767,11 +629,33 @@ impl<'func, I: VCodeInst> Lower<'func, I> { let loc = self.srcloc(inst); self.finish_ir_inst(loc); + + // Emit value-label markers if needed, to later recover + // debug mappings. This must happen before the instruction + // (so after we emit, in bottom-to-top pass). + self.emit_value_label_markers_for_inst(inst); } + + // Add the block params to this block. + self.add_block_params(block)?; + self.cur_scan_entry_color = None; Ok(()) } + fn add_block_params(&mut self, block: Block) -> CodegenResult<()> { + for ¶m in self.f.dfg.block_params(block) { + let ty = self.f.dfg.value_type(param); + let (_reg_rcs, reg_tys) = I::rc_for_type(ty)?; + debug_assert_eq!(reg_tys.len(), self.value_regs[param].len()); + for (®, &rty) in self.value_regs[param].regs().iter().zip(reg_tys.iter()) { + self.vcode + .add_block_param(reg.to_virtual_reg().unwrap(), rty); + } + } + Ok(()) + } + fn get_value_labels<'a>(&'a self, val: Value, depth: usize) -> Option<&'a [ValueLabelStart]> { if let Some(ref values_labels) = self.f.dfg.values_labels { log::trace!( @@ -794,7 +678,6 @@ impl<'func, I: VCodeInst> Lower<'func, I> { } fn emit_value_label_marks_for_value(&mut self, val: Value) { - let mut markers: SmallVec<[I; 4]> = smallvec![]; let regs = self.value_regs[val]; if regs.len() > 1 { return; @@ -813,12 +696,9 @@ impl<'func, I: VCodeInst> Lower<'func, I> { reg, label, ); - markers.push(I::gen_value_label_marker(label, reg)); + self.vcode.add_value_label(reg, label); } } - for marker in markers { - self.emit(marker); - } } fn emit_value_label_markers_for_inst(&mut self, inst: Inst) { @@ -849,36 +729,17 @@ impl<'func, I: VCodeInst> Lower<'func, I> { } fn finish_ir_inst(&mut self, loc: SourceLoc) { - // `bb_insts` is kept in reverse order, so emit the instructions in - // reverse order. - for mut tuple in self.ir_insts.drain(..).rev() { - tuple.loc = loc; - self.bb_insts.push(tuple); + self.vcode.set_srcloc(loc); + // The VCodeBuilder builds in reverse order (and reverses at + // the end), but `ir_insts` is in forward order, so reverse + // it. + for inst in self.ir_insts.drain(..).rev() { + self.vcode.push(inst); } } fn finish_bb(&mut self) { - let start = self.block_insts.len(); - for tuple in self.bb_insts.drain(..).rev() { - self.block_insts.push(tuple); - } - let end = self.block_insts.len(); - self.block_ranges.push((start, end)); - } - - fn copy_bbs_to_vcode(&mut self) { - for &(start, end) in self.block_ranges.iter().rev() { - for &InstTuple { - loc, - is_safepoint, - ref inst, - } in &self.block_insts[start..end] - { - self.vcode.set_srcloc(loc); - self.vcode.push(inst.clone(), is_safepoint); - } - self.vcode.end_bb(); - } + self.vcode.end_bb(); } fn lower_clif_branches>( @@ -900,9 +761,28 @@ impl<'func, I: VCodeInst> Lower<'func, I> { backend.lower_branch_group(self, branches, targets)?; let loc = self.srcloc(branches[0]); self.finish_ir_inst(loc); + // Add block param outputs for current block. + self.lower_branch_blockparam_args(block); Ok(()) } + fn lower_branch_blockparam_args(&mut self, block: Block) { + visit_block_succs(self.f, block, |inst, _succ| { + let branch_args = self.f.dfg.inst_variable_args(inst); + let mut branch_arg_vregs: SmallVec<[Reg; 16]> = smallvec![]; + for &arg in branch_args { + let arg = self.f.dfg.resolve_aliases(arg); + let regs = self.put_value_in_regs(arg); + for &vreg in regs.regs() { + let vreg = self.vcode.resolve_vreg_alias(vreg.into()); + branch_arg_vregs.push(vreg.into()); + } + } + self.vcode.add_branch_args_for_succ(&branch_arg_vregs[..]); + }); + self.finish_ir_inst(SourceLoc::default()); + } + fn collect_branches_and_targets( &self, bindex: BlockIndex, @@ -927,10 +807,7 @@ impl<'func, I: VCodeInst> Lower<'func, I> { } /// Lower the function. - pub fn lower>( - mut self, - backend: &B, - ) -> CodegenResult<(VCode, StackmapRequestInfo)> { + pub fn lower>(mut self, backend: &B) -> CodegenResult> { log::trace!("about to lower function: {:?}", self.f); // Initialize the ABI object, giving it a temp if requested. @@ -945,7 +822,7 @@ impl<'func, I: VCodeInst> Lower<'func, I> { // not the whole `Lower` impl). self.pinned_reg = backend.maybe_pinned_reg(); - self.vcode.set_entry(0); + self.vcode.set_entry(BlockIndex::new(0)); // Reused vectors for branch lowering. let mut branches: SmallVec<[Inst; 2]> = SmallVec::new(); @@ -963,7 +840,7 @@ impl<'func, I: VCodeInst> Lower<'func, I> { // Main lowering loop over lowered blocks. for (bindex, lb) in lowered_order.iter().enumerate().rev() { - let bindex = bindex as BlockIndex; + let bindex = BlockIndex::new(bindex); // Lower the block body in reverse order (see comment in // `lower_clif_block()` for rationale). @@ -976,30 +853,41 @@ impl<'func, I: VCodeInst> Lower<'func, I> { self.finish_ir_inst(self.srcloc(branches[0])); } } else { - // If no orig block, this must be a pure edge block; get the successor and - // emit a jump. + // If no orig block, this must be a pure edge block; + // get the successor and emit a jump. Add block params + // according to the one successor, and pass them + // through; note that the successor must have an + // original block. let (_, succ) = self.vcode.block_order().succ_indices(bindex)[0]; + + let orig_succ = lowered_order[succ.index()]; + let orig_succ = orig_succ + .orig_block() + .expect("Edge block succ must be body block"); + + let mut branch_arg_vregs: SmallVec<[Reg; 16]> = smallvec![]; + for ty in self.f.dfg.block_param_types(orig_succ) { + let regs = alloc_vregs(ty, &mut self.next_vreg, &mut self.vcode)?; + for ® in regs.regs() { + branch_arg_vregs.push(reg); + let vreg = reg.to_virtual_reg().unwrap(); + self.vcode + .add_block_param(vreg, self.vcode.get_vreg_type(vreg)); + } + } + self.vcode.add_branch_args_for_succ(&branch_arg_vregs[..]); + self.emit(I::gen_jump(MachLabel::from_block(succ))); self.finish_ir_inst(SourceLoc::default()); } - // Out-edge phi moves. - if let Some((pred, inst, succ)) = lb.out_edge() { - self.lower_edge(pred, inst, succ)?; - self.finish_ir_inst(SourceLoc::default()); - } // Original block body. if let Some(bb) = lb.orig_block() { self.lower_clif_block(backend, bb)?; self.emit_value_label_markers_for_block_args(bb); } - // In-edge phi moves. - if let Some((pred, inst, succ)) = lb.in_edge() { - self.lower_edge(pred, inst, succ)?; - self.finish_ir_inst(SourceLoc::default()); - } - if bindex == 0 { + if bindex.index() == 0 { // Set up the function with arg vreg inits. self.gen_arg_setup(); self.finish_ir_inst(SourceLoc::default()); @@ -1008,13 +896,12 @@ impl<'func, I: VCodeInst> Lower<'func, I> { self.finish_bb(); } - self.copy_bbs_to_vcode(); - - // Now that we've emitted all instructions into the VCodeBuilder, let's build the VCode. - let (vcode, stack_map_info) = self.vcode.build(); + // Now that we've emitted all instructions into the + // VCodeBuilder, let's build the VCode. + let vcode = self.vcode.build(); log::trace!("built vcode: {:?}", vcode); - Ok((vcode, stack_map_info)) + Ok(vcode) } } @@ -1278,19 +1165,8 @@ impl<'func, I: VCodeInst> LowerCtx for Lower<'func, I> { } fn emit(&mut self, mach_inst: I) { - self.ir_insts.push(InstTuple { - loc: SourceLoc::default(), - is_safepoint: false, - inst: mach_inst, - }); - } - - fn emit_safepoint(&mut self, mach_inst: I) { - self.ir_insts.push(InstTuple { - loc: SourceLoc::default(), - is_safepoint: true, - inst: mach_inst, - }); + log::trace!("emit: {:?}", mach_inst); + self.ir_insts.push(mach_inst); } fn sink_inst(&mut self, ir_inst: Inst) { @@ -1336,7 +1212,7 @@ impl<'func, I: VCodeInst> LowerCtx for Lower<'func, I> { } fn ensure_in_vreg(&mut self, reg: Reg, ty: Type) -> Reg { - if reg.is_virtual() { + if reg.to_virtual_reg().is_some() { reg } else { let new_reg = self.alloc_tmp(ty).only_reg().unwrap(); @@ -1344,6 +1220,11 @@ impl<'func, I: VCodeInst> LowerCtx for Lower<'func, I> { new_reg.to_reg() } } + + fn set_vreg_alias(&mut self, from: Reg, to: Reg) { + log::trace!("set vreg alias: from {:?} to {:?}", from, to); + self.vcode.set_vreg_alias(from, to); + } } /// Visit all successors of a block with a given visitor closure. diff --git a/cranelift/codegen/src/machinst/mod.rs b/cranelift/codegen/src/machinst/mod.rs index c2cdb4e32c..7d1581d4f8 100644 --- a/cranelift/codegen/src/machinst/mod.rs +++ b/cranelift/codegen/src/machinst/mod.rs @@ -8,14 +8,10 @@ //! //! The container for machine instructions, at various stages of construction, //! is the `VCode` struct. We refer to a sequence of machine instructions organized -//! into basic blocks as "vcode". This is short for "virtual-register code", though -//! it's a bit of a misnomer because near the end of the pipeline, vcode has all -//! real registers. Nevertheless, the name is catchy and we like it. +//! into basic blocks as "vcode". This is short for "virtual-register code". //! //! The compilation pipeline, from an `ir::Function` (already optimized as much as //! you like by machine-independent optimization passes) onward, is as follows. -//! (N.B.: though we show the VCode separately at each stage, the passes -//! mutate the VCode in place; these are not separate copies of the code.) //! //! ```plain //! @@ -31,37 +27,25 @@ //! | with unknown offsets. //! | - critical edges (actually all edges) //! | are split.) -//! | [regalloc] //! | -//! VCode (machine instructions: -//! | - all real registers. -//! | - new instruction sequence returned -//! | out-of-band in RegAllocResult. -//! | - instruction sequence has spills, -//! | reloads, and moves inserted. -//! | - other invariants same as above.) +//! | [regalloc --> `regalloc2::Output`; VCode is unchanged] //! | -//! | [preamble/postamble] +//! | [binary emission via MachBuffer] //! | -//! VCode (machine instructions: -//! | - stack-frame size known. -//! | - out-of-band instruction sequence -//! | has preamble prepended to entry -//! | block, and postamble injected before -//! | every return instruction. -//! | - all symbolic stack references to -//! | stackslots and spillslots are resolved -//! | to concrete FP-offset mem addresses.) -//! | -//! | [binary emission via MachBuffer -//! | with streaming branch resolution/simplification] -//! | -//! Vec (machine code!) +//! Vec (machine code: +//! | - two-dest branches resolved via +//! | streaming branch resolution/simplification. +//! | - regalloc `Allocation` results used directly +//! | by instruction emission code. +//! | - prologue and epilogue(s) built and emitted +//! | directly during emission. +//! | - nominal-SP-relative offsets resolved +//! | by tracking EmitState.) //! //! ``` use crate::binemit::{Addend, CodeInfo, CodeOffset, Reloc, StackMap}; -use crate::ir::{SourceLoc, StackSlot, Type, ValueLabel}; +use crate::ir::{SourceLoc, StackSlot, Type}; use crate::result::CodegenResult; use crate::settings::Flags; use crate::value_label::ValueLabelsRanges; @@ -69,10 +53,7 @@ use alloc::boxed::Box; use alloc::vec::Vec; use core::fmt::Debug; use cranelift_entity::PrimaryMap; -use regalloc::RegUsageCollector; -use regalloc::{ - RealReg, RealRegUniverse, Reg, RegClass, RegUsageMapper, SpillSlot, VirtualReg, Writable, -}; +use regalloc2::{Allocation, VReg}; use smallvec::{smallvec, SmallVec}; use std::string::String; @@ -98,20 +79,15 @@ pub use helpers::*; pub mod inst_common; pub use inst_common::*; pub mod valueregs; +pub use reg::*; pub use valueregs::*; -pub mod debug; -pub use regmapping::*; -pub mod regmapping; +pub mod reg; /// A machine instruction. pub trait MachInst: Clone + Debug { /// Return the registers referenced by this machine instruction along with /// the modes of reference (use, def, modify). - fn get_regs(&self, collector: &mut RegUsageCollector); - - /// Map virtual registers to physical registers using the given virt->phys - /// maps corresponding to the program points prior to, and after, this instruction. - fn map_regs(&mut self, maps: &RUM); + fn get_operands VReg>(&self, collector: &mut OperandCollector<'_, F>); /// If this is a simple move, return the (source, destination) tuple of registers. fn is_move(&self) -> Option<(Writable, Reg)>; @@ -128,11 +104,6 @@ pub trait MachInst: Clone + Debug { true } - /// If this is a load or store to the stack, return that info. - fn stack_op_info(&self) -> Option { - None - } - /// Generate a move. fn gen_move(to_reg: Writable, from_reg: Reg, ty: Type) -> Self; @@ -144,10 +115,9 @@ pub trait MachInst: Clone + Debug { alloc_tmp: F, ) -> SmallVec<[Self; 4]>; - /// Possibly operate on a value directly in a spill-slot rather than a - /// register. Useful if the machine has register-memory instruction forms - /// (e.g., add directly from or directly to memory), like x86. - fn maybe_direct_reload(&self, reg: VirtualReg, slot: SpillSlot) -> Option; + /// Generate a dummy instruction that will keep a value alive but + /// has no other purpose. + fn gen_dummy_use(reg: Reg) -> Self; /// Determine register class(es) to store the given Cranelift type, and the /// Cranelift type actually stored in the underlying register(s). May return @@ -163,6 +133,13 @@ pub trait MachInst: Clone + Debug { /// generating spills and reloads for individual registers. fn rc_for_type(ty: Type) -> CodegenResult<(&'static [RegClass], &'static [Type])>; + /// Get an appropriate type that can fully hold a value in a given + /// register class. This may not be the only type that maps to + /// that class, but when used with `gen_move()` or the ABI trait's + /// load/spill constructors, it should produce instruction(s) that + /// move the entire register contents. + fn canonical_type_for_rc(rc: RegClass) -> Type; + /// Generate a jump to another target. Used during lowering of /// control flow. fn gen_jump(target: MachLabel) -> Self; @@ -187,16 +164,8 @@ pub trait MachInst: Clone + Debug { /// be dependent on compilation flags. fn ref_type_regclass(_flags: &Flags) -> RegClass; - /// Does this instruction define a ValueLabel? Returns the `Reg` whose value - /// becomes the new value of the `ValueLabel` after this instruction. - fn defines_value_label(&self) -> Option<(ValueLabel, Reg)> { - None - } - - /// Create a marker instruction that defines a value label. - fn gen_value_label_marker(_label: ValueLabel, _reg: Reg) -> Self { - Self::gen_nop(0) - } + /// Is this a safepoint? + fn is_safepoint(&self) -> bool; /// A label-use kind: a type that describes the types of label references that /// can occur in an instruction. @@ -266,35 +235,6 @@ pub enum MachTerminator<'a> { Indirect(&'a [MachLabel]), } -impl<'a> MachTerminator<'a> { - /// Get the successor labels named in a `MachTerminator`. - pub fn get_succs(&self) -> SmallVec<[MachLabel; 2]> { - let mut ret = smallvec![]; - match self { - &MachTerminator::Uncond(l) => { - ret.push(l); - } - &MachTerminator::Cond(l1, l2) => { - ret.push(l1); - ret.push(l2); - } - &MachTerminator::Indirect(ls) => { - ret.extend(ls.iter().cloned()); - } - _ => {} - } - ret - } - - /// Is this a terminator? - pub fn is_term(&self) -> bool { - match self { - MachTerminator::None => false, - _ => true, - } - } -} - /// A trait describing the ability to encode a MachInst into binary machine code. pub trait MachInstEmit: MachInst { /// Persistent state carried across `emit` invocations. @@ -302,9 +242,15 @@ pub trait MachInstEmit: MachInst { /// Constant information used in `emit` invocations. type Info; /// Emit the instruction. - fn emit(&self, code: &mut MachBuffer, info: &Self::Info, state: &mut Self::State); + fn emit( + &self, + allocs: &[Allocation], + code: &mut MachBuffer, + info: &Self::Info, + state: &mut Self::State, + ); /// Pretty-print the instruction. - fn pretty_print(&self, mb_rru: Option<&RealRegUniverse>, state: &mut Self::State) -> String; + fn pretty_print_inst(&self, allocs: &[Allocation], state: &mut Self::State) -> String; } /// A trait describing the emission state carried between MachInsts when @@ -409,15 +355,3 @@ pub enum UnwindInfoKind { #[cfg(feature = "unwind")] Windows, } - -/// Info about an operation that loads or stores from/to the stack. -#[derive(Clone, Copy, Debug)] -pub enum MachInstStackOpInfo { - /// Load from an offset from the nominal stack pointer into the given reg. - LoadNomSPOff(Reg, i64), - /// Store to an offset from the nominal stack pointer from the given reg. - StoreNomSPOff(Reg, i64), - /// Adjustment of nominal-SP up or down. This value is added to subsequent - /// offsets in loads/stores above to produce real-SP offsets. - NomSPAdj(i64), -} diff --git a/cranelift/codegen/src/machinst/reg.rs b/cranelift/codegen/src/machinst/reg.rs new file mode 100644 index 0000000000..8b0b835aac --- /dev/null +++ b/cranelift/codegen/src/machinst/reg.rs @@ -0,0 +1,504 @@ +//! Definitions for registers, operands, etc. Provides a thin +//! interface over the register allocator so that we can more easily +//! swap it out or shim it when necessary. + +use crate::machinst::MachInst; +use alloc::{string::String, vec::Vec}; +use core::{fmt::Debug, hash::Hash}; +use regalloc2::{Allocation, Operand, PReg, VReg}; +use smallvec::{smallvec, SmallVec}; + +#[cfg(feature = "enable-serde")] +use serde::{Deserialize, Serialize}; + +/// The first 128 vregs (64 int, 64 float/vec) are "pinned" to +/// physical registers: this means that they are always constrained to +/// the corresponding register at all use/mod/def sites. +/// +/// Arbitrary vregs can also be constrained to physical registers at +/// particular use/def/mod sites, and this is preferable; but pinned +/// vregs allow us to migrate code that has been written using +/// RealRegs directly. +const PINNED_VREGS: usize = 128; + +/// Convert a `VReg` to its pinned `PReg`, if any. +pub fn pinned_vreg_to_preg(vreg: VReg) -> Option { + if vreg.vreg() < PINNED_VREGS { + Some(PReg::from_index(vreg.vreg())) + } else { + None + } +} + +/// Give the first available vreg for generated code (i.e., after all +/// pinned vregs). +pub fn first_user_vreg_index() -> usize { + // This is just the constant defined above, but we keep the + // constant private and expose only this helper function with the + // specific name in order to ensure other parts of the code don't + // open-code and depend on the index-space scheme. + PINNED_VREGS +} + +/// A register named in an instruction. This register can be either a +/// virtual register or a fixed physical register. It does not have +/// any constraints applied to it: those can be added later in +/// `MachInst::get_operands()` when the `Reg`s are converted to +/// `Operand`s. +#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] +#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))] +pub struct Reg(VReg); + +impl Reg { + /// Get the physical register (`RealReg`), if this register is + /// one. + pub fn to_real_reg(self) -> Option { + if pinned_vreg_to_preg(self.0).is_some() { + Some(RealReg(self.0)) + } else { + None + } + } + + /// Get the virtual (non-physical) register, if this register is + /// one. + pub fn to_virtual_reg(self) -> Option { + if pinned_vreg_to_preg(self.0).is_none() { + Some(VirtualReg(self.0)) + } else { + None + } + } + + /// Get the class of this register. + pub fn class(self) -> RegClass { + self.0.class() + } + + /// Is this a real (physical) reg? + pub fn is_real(self) -> bool { + self.to_real_reg().is_some() + } + + /// Is this a virtual reg? + pub fn is_virtual(self) -> bool { + self.to_virtual_reg().is_some() + } +} + +impl std::fmt::Debug for Reg { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + if let Some(rreg) = self.to_real_reg() { + let preg: PReg = rreg.into(); + write!(f, "{}", preg) + } else if let Some(vreg) = self.to_virtual_reg() { + let vreg: VReg = vreg.into(); + write!(f, "{}", vreg) + } else { + unreachable!() + } + } +} + +/// A real (physical) register. This corresponds to one of the target +/// ISA's named registers and can be used as an instruction operand. +#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] +#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))] +pub struct RealReg(VReg); + +impl RealReg { + /// Get the class of this register. + pub fn class(self) -> RegClass { + self.0.class() + } + + pub fn hw_enc(self) -> u8 { + PReg::from(self).hw_enc() as u8 + } +} + +impl std::fmt::Debug for RealReg { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + Reg::from(*self).fmt(f) + } +} + +/// A virtual register. This can be allocated into a real (physical) +/// register of the appropriate register class, but which one is not +/// specified. Virtual registers are used when generating `MachInst`s, +/// before register allocation occurs, in order to allow us to name as +/// many register-carried values as necessary. +#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] +#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))] +pub struct VirtualReg(VReg); + +impl VirtualReg { + /// Get the class of this register. + pub fn class(self) -> RegClass { + self.0.class() + } + + pub fn index(self) -> usize { + self.0.vreg() + } +} + +impl std::fmt::Debug for VirtualReg { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + Reg::from(*self).fmt(f) + } +} + +/// A type wrapper that indicates a register type is writable. The +/// underlying register can be extracted, and the type wrapper can be +/// built using an arbitrary register. Hence, this type-level wrapper +/// is not strictly a guarantee. However, "casting" to a writable +/// register is an explicit operation for which we can +/// audit. Ordinarily, internal APIs in the compiler backend should +/// take a `Writable` whenever the register is written, and the +/// usual, frictionless way to get one of these is to allocate a new +/// temporary. +#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] +#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))] +pub struct Writable { + reg: T, +} + +impl Writable { + /// Explicitly construct a `Writable` from a `T`. As noted in + /// the documentation for `Writable`, this is not hidden or + /// disallowed from the outside; anyone can perform the "cast"; + /// but it is explicit so that we can audit the use sites. + pub fn from_reg(reg: T) -> Writable { + Writable { reg } + } + + /// Get the underlying register, which can be read. + pub fn to_reg(self) -> T { + self.reg + } + + /// Map the underlying register to another value or type. + pub fn map(self, f: F) -> Writable + where + U: Clone + Copy + Debug + PartialEq + Eq + PartialOrd + Ord + Hash, + F: Fn(T) -> U, + { + Writable { reg: f(self.reg) } + } +} + +// Conversions between regalloc2 types (VReg) and our types +// (VirtualReg, RealReg, Reg). + +impl std::convert::From for Reg { + fn from(vreg: regalloc2::VReg) -> Reg { + Reg(vreg) + } +} + +impl std::convert::From for VirtualReg { + fn from(vreg: regalloc2::VReg) -> VirtualReg { + debug_assert!(pinned_vreg_to_preg(vreg).is_none()); + VirtualReg(vreg) + } +} + +impl std::convert::From for RealReg { + fn from(vreg: regalloc2::VReg) -> RealReg { + debug_assert!(pinned_vreg_to_preg(vreg).is_some()); + RealReg(vreg) + } +} + +impl std::convert::From for regalloc2::VReg { + /// Extract the underlying `regalloc2::VReg`. Note that physical + /// registers also map to particular (special) VRegs, so this + /// method can be used either on virtual or physical `Reg`s. + fn from(reg: Reg) -> regalloc2::VReg { + reg.0 + } +} + +impl std::convert::From for regalloc2::VReg { + fn from(reg: VirtualReg) -> regalloc2::VReg { + reg.0 + } +} + +impl std::convert::From for regalloc2::VReg { + fn from(reg: RealReg) -> regalloc2::VReg { + reg.0 + } +} + +impl std::convert::From for regalloc2::PReg { + fn from(reg: RealReg) -> regalloc2::PReg { + PReg::from_index(reg.0.vreg()) + } +} + +impl std::convert::From for RealReg { + fn from(preg: regalloc2::PReg) -> RealReg { + RealReg(VReg::new(preg.index(), preg.class())) + } +} + +impl std::convert::From for Reg { + fn from(preg: regalloc2::PReg) -> Reg { + Reg(VReg::new(preg.index(), preg.class())) + } +} + +impl std::convert::From for Reg { + fn from(reg: RealReg) -> Reg { + Reg(reg.0) + } +} + +impl std::convert::From for Reg { + fn from(reg: VirtualReg) -> Reg { + Reg(reg.0) + } +} + +/// A spill slot. +pub type SpillSlot = regalloc2::SpillSlot; + +/// A register class. Each register in the ISA has one class, and the +/// classes are disjoint. Most modern ISAs will have just two classes: +/// the integer/general-purpose registers (GPRs), and the float/vector +/// registers (typically used for both). +/// +/// Note that unlike some other compiler backend/register allocator +/// designs, we do not allow for overlapping classes, i.e. registers +/// that belong to more than one class, because doing so makes the +/// allocation problem significantly more complex. Instead, when a +/// register can be addressed under different names for different +/// sizes (for example), the backend author should pick classes that +/// denote some fundamental allocation unit that encompasses the whole +/// register. For example, always allocate 128-bit vector registers +/// `v0`..`vN`, even though `f32` and `f64` values may use only the +/// low 32/64 bits of those registers and name them differently. +pub type RegClass = regalloc2::RegClass; + +/// An OperandCollector is a wrapper around a Vec of Operands +/// (flattened array for a whole sequence of instructions) that +/// gathers operands from a single instruction and provides the range +/// in the flattened array. +#[derive(Debug)] +pub struct OperandCollector<'a, F: Fn(VReg) -> VReg> { + operands: &'a mut Vec, + operands_start: usize, + clobbers: Vec, + renamer: F, +} + +impl<'a, F: Fn(VReg) -> VReg> OperandCollector<'a, F> { + /// Start gathering operands into one flattened operand array. + pub fn new(operands: &'a mut Vec, renamer: F) -> Self { + let operands_start = operands.len(); + Self { + operands, + operands_start, + clobbers: vec![], + renamer, + } + } + + /// Add an operand. + fn add_operand(&mut self, operand: Operand) { + let vreg = (self.renamer)(operand.vreg()); + let operand = Operand::new(vreg, operand.constraint(), operand.kind(), operand.pos()); + self.operands.push(operand); + } + + /// Add a clobber. + fn add_clobber(&mut self, clobber: PReg) { + self.clobbers.push(clobber); + } + + /// Finish the operand collection and return the tuple giving the + /// range of indices in the flattened operand array, and the + /// clobber array. + pub fn finish(self) -> ((u32, u32), Vec) { + let start = self.operands_start as u32; + let end = self.operands.len() as u32; + ((start, end), self.clobbers) + } + + /// Add a register use, at the start of the instruction (`Before` + /// position). + pub fn reg_use(&mut self, reg: Reg) { + self.add_operand(Operand::reg_use(reg.into())); + } + + /// Add multiple register uses. + pub fn reg_uses(&mut self, regs: &[Reg]) { + for ® in regs { + self.reg_use(reg); + } + } + + /// Add a register def, at the end of the instruction (`After` + /// position). Use only when this def will be written after all + /// uses are read. + pub fn reg_def(&mut self, reg: Writable) { + self.add_operand(Operand::reg_def(reg.to_reg().into())); + } + + /// Add multiple register defs. + pub fn reg_defs(&mut self, regs: &[Writable]) { + for ® in regs { + self.reg_def(reg); + } + } + + /// Add a register "early def", which logically occurs at the + /// beginning of the instruction, alongside all uses. Use this + /// when the def may be written before all uses are read; the + /// regalloc will ensure that it does not overwrite any uses. + pub fn reg_early_def(&mut self, reg: Writable) { + self.add_operand(Operand::reg_def_at_start(reg.to_reg().into())); + } + + /// Add a register "fixed use", which ties a vreg to a particular + /// RealReg at this point. + pub fn reg_fixed_use(&mut self, reg: Reg, rreg: Reg) { + let rreg = rreg.to_real_reg().expect("fixed reg is not a RealReg"); + self.add_operand(Operand::reg_fixed_use(reg.into(), rreg.into())); + } + + /// Add a register "fixed def", which ties a vreg to a particular + /// RealReg at this point. + pub fn reg_fixed_def(&mut self, reg: Writable, rreg: Reg) { + let rreg = rreg.to_real_reg().expect("fixed reg is not a RealReg"); + self.add_operand(Operand::reg_fixed_def(reg.to_reg().into(), rreg.into())); + } + + /// Add a register def that reuses an earlier use-operand's + /// allocation. The index of that earlier operand (relative to the + /// current instruction's start of operands) must be known. + pub fn reg_reuse_def(&mut self, reg: Writable, idx: usize) { + if reg.to_reg().to_virtual_reg().is_some() { + self.add_operand(Operand::reg_reuse_def(reg.to_reg().into(), idx)); + } else { + // Sometimes destination registers that reuse a source are + // given with RealReg args. In this case, we assume the + // creator of the instruction knows what they are doing + // and just emit a normal def to the pinned vreg. + self.add_operand(Operand::reg_def(reg.to_reg().into())); + } + } + + /// Add a register use+def, or "modify", where the reg must stay + /// in the same register on the input and output side of the + /// instruction. + pub fn reg_mod(&mut self, reg: Writable) { + self.add_operand(Operand::new( + reg.to_reg().into(), + regalloc2::OperandConstraint::Reg, + regalloc2::OperandKind::Mod, + regalloc2::OperandPos::Early, + )); + } + + /// Add a register clobber. This is a register that is written by + /// the instruction, so must be reserved (not used) for the whole + /// instruction, but is not used afterward. + #[allow(dead_code)] // FIXME: use clobbers rather than defs for calls! + pub fn reg_clobber(&mut self, reg: Writable) { + self.add_clobber(PReg::from(reg.to_reg())); + } +} + +/// Use an OperandCollector to count the number of operands on an instruction. +pub fn count_operands(inst: &I) -> usize { + let mut ops = vec![]; + let mut coll = OperandCollector::new(&mut ops, |vreg| vreg); + inst.get_operands(&mut coll); + let ((start, end), _) = coll.finish(); + debug_assert_eq!(0, start); + end as usize +} + +/// Pretty-print part of a disassembly, with knowledge of +/// operand/instruction size, and optionally with regalloc +/// results. This can be used, for example, to print either `rax` or +/// `eax` for the register by those names on x86-64, depending on a +/// 64- or 32-bit context. +pub trait PrettyPrint { + fn pretty_print(&self, size_bytes: u8, allocs: &mut AllocationConsumer<'_>) -> String; + + fn pretty_print_default(&self) -> String { + self.pretty_print(0, &mut AllocationConsumer::new(&[])) + } +} + +/// A consumer of an (optional) list of Allocations along with Regs +/// that provides RealRegs where available. +/// +/// This is meant to be used during code emission or +/// pretty-printing. In at least the latter case, regalloc results may +/// or may not be available, so we may end up printing either vregs or +/// rregs. Even pre-regalloc, though, some registers may be RealRegs +/// that were provided when the instruction was created. +/// +/// This struct should be used in a specific way: when matching on an +/// instruction, provide it the Regs in the same order as they were +/// provided to the OperandCollector. +#[derive(Clone)] +pub struct AllocationConsumer<'a> { + allocs: std::slice::Iter<'a, Allocation>, +} + +impl<'a> AllocationConsumer<'a> { + pub fn new(allocs: &'a [Allocation]) -> Self { + Self { + allocs: allocs.iter(), + } + } + + pub fn next(&mut self, pre_regalloc_reg: Reg) -> Reg { + let alloc = self.allocs.next(); + let alloc = alloc.map(|alloc| { + Reg::from( + alloc + .as_reg() + .expect("Should not have gotten a stack allocation"), + ) + }); + + match (pre_regalloc_reg.to_real_reg(), alloc) { + (Some(rreg), None) => rreg.into(), + (Some(rreg), Some(alloc)) => { + debug_assert_eq!(Reg::from(rreg), alloc); + alloc + } + (None, Some(alloc)) => alloc, + _ => pre_regalloc_reg, + } + } + + pub fn next_writable(&mut self, pre_regalloc_reg: Writable) -> Writable { + Writable::from_reg(self.next(pre_regalloc_reg.to_reg())) + } + + pub fn next_n(&mut self, count: usize) -> SmallVec<[Allocation; 4]> { + let mut allocs = smallvec![]; + for _ in 0..count { + if let Some(next) = self.allocs.next() { + allocs.push(*next); + } else { + return allocs; + } + } + allocs + } +} + +impl<'a> std::default::Default for AllocationConsumer<'a> { + fn default() -> Self { + Self { allocs: [].iter() } + } +} diff --git a/cranelift/codegen/src/machinst/regmapping.rs b/cranelift/codegen/src/machinst/regmapping.rs deleted file mode 100644 index 4b51c426bd..0000000000 --- a/cranelift/codegen/src/machinst/regmapping.rs +++ /dev/null @@ -1,108 +0,0 @@ -use crate::ir::Type; -use regalloc::{Reg, RegUsageMapper, Writable}; -use smallvec::SmallVec; -use std::cell::Cell; - -// Define our own register-mapping trait so we can do arbitrary register -// renaming that are more free form than what `regalloc` constrains us to with -// its `RegUsageMapper` trait definition. -pub trait RegMapper { - fn get_use(&self, reg: Reg) -> Option; - fn get_def(&self, reg: Reg) -> Option; - fn get_mod(&self, reg: Reg) -> Option; - - fn map_use(&self, r: &mut Reg) { - if let Some(new) = self.get_use(*r) { - *r = new; - } - } - - fn map_def(&self, r: &mut Writable) { - if let Some(new) = self.get_def(r.to_reg()) { - *r = Writable::from_reg(new); - } - } - - fn map_mod(&self, r: &mut Writable) { - if let Some(new) = self.get_mod(r.to_reg()) { - *r = Writable::from_reg(new); - } - } -} - -impl RegMapper for T -where - T: RegUsageMapper, -{ - fn get_use(&self, reg: Reg) -> Option { - let v = reg.as_virtual_reg()?; - self.get_use(v).map(|r| r.to_reg()) - } - - fn get_def(&self, reg: Reg) -> Option { - let v = reg.as_virtual_reg()?; - self.get_def(v).map(|r| r.to_reg()) - } - - fn get_mod(&self, reg: Reg) -> Option { - let v = reg.as_virtual_reg()?; - self.get_mod(v).map(|r| r.to_reg()) - } -} - -#[derive(Debug, Default)] -pub struct RegRenamer { - // Map of `(old, new, used, ty)` register names. Use a `SmallVec` because - // we typically only have one or two renamings. - // - // The `used` flag indicates whether the mapping has been used for - // `get_def`, later used afterwards during `unmapped_defs` to know what - // moves need to be generated. - renames: SmallVec<[(Reg, Reg, Cell, Type); 2]>, -} - -impl RegRenamer { - /// Adds a new mapping which means that `old` reg should now be called - /// `new`. The type of `old` is `ty` as specified. - pub fn add_rename(&mut self, old: Reg, new: Reg, ty: Type) { - self.renames.push((old, new, Cell::new(false), ty)); - } - - fn get_rename(&self, reg: Reg, set_used_def: bool) -> Option { - let (_, new, used_def, _) = self.renames.iter().find(|(old, _, _, _)| reg == *old)?; - used_def.set(used_def.get() || set_used_def); - Some(*new) - } - - /// Returns the list of register mappings, with their type, which were not - /// actually mapped. - /// - /// This list is used because it means that the `old` name for the register - /// was never actually defined, so to correctly rename this register the - /// caller needs to move `old` into `new`. - /// - /// This yields tuples of `(old, new, ty)`. - pub fn unmapped_defs(&self) -> impl Iterator + '_ { - self.renames.iter().filter_map(|(old, new, used_def, ty)| { - if used_def.get() { - None - } else { - Some((*old, *new, *ty)) - } - }) - } -} - -impl RegMapper for RegRenamer { - fn get_use(&self, reg: Reg) -> Option { - self.get_rename(reg, false) - } - - fn get_def(&self, reg: Reg) -> Option { - self.get_rename(reg, true) - } - - fn get_mod(&self, reg: Reg) -> Option { - self.get_rename(reg, false) - } -} diff --git a/cranelift/codegen/src/machinst/valueregs.rs b/cranelift/codegen/src/machinst/valueregs.rs index df18202ccc..52b3c89896 100644 --- a/cranelift/codegen/src/machinst/valueregs.rs +++ b/cranelift/codegen/src/machinst/valueregs.rs @@ -1,7 +1,9 @@ //! Data structure for tracking the (possibly multiple) registers that hold one //! SSA `Value`. -use regalloc::{RealReg, Reg, VirtualReg, Writable}; +use regalloc2::{PReg, VReg}; + +use super::{RealReg, Reg, VirtualReg, Writable}; use std::fmt::Debug; const VALUE_REGS_PARTS: usize = 2; @@ -35,17 +37,17 @@ pub trait InvalidSentinel: Copy + Eq { } impl InvalidSentinel for Reg { fn invalid_sentinel() -> Self { - Reg::invalid() + Reg::from(VReg::invalid()) } } impl InvalidSentinel for VirtualReg { fn invalid_sentinel() -> Self { - VirtualReg::invalid() + VirtualReg::from(VReg::invalid()) } } impl InvalidSentinel for RealReg { fn invalid_sentinel() -> Self { - RealReg::invalid() + RealReg::from(PReg::invalid()) } } impl InvalidSentinel for Writable { diff --git a/cranelift/codegen/src/machinst/vcode.rs b/cranelift/codegen/src/machinst/vcode.rs index d1e585c609..fde128088c 100644 --- a/cranelift/codegen/src/machinst/vcode.rs +++ b/cranelift/codegen/src/machinst/vcode.rs @@ -18,46 +18,46 @@ //! backend pipeline. use crate::fx::FxHashMap; -use crate::ir::{self, types, Constant, ConstantData, SourceLoc}; +use crate::fx::FxHashSet; +use crate::ir::{self, types, Constant, ConstantData, LabelValueLoc, SourceLoc, ValueLabel}; use crate::machinst::*; -use crate::settings; use crate::timing; -use regalloc::Function as RegallocFunction; -use regalloc::Set as RegallocSet; -use regalloc::{ - BlockIx, InstIx, PrettyPrint, Range, RegAllocResult, RegClass, RegUsageCollector, - RegUsageMapper, SpillSlot, StackmapRequestInfo, +use crate::ValueLocRange; +use regalloc2::{ + Edit, Function as RegallocFunction, InstOrEdit, InstRange, Operand, OperandKind, PReg, + RegClass, VReg, }; use alloc::boxed::Box; -use alloc::{borrow::Cow, vec::Vec}; +use alloc::vec::Vec; use cranelift_entity::{entity_impl, Keys, PrimaryMap}; -use std::cell::RefCell; use std::collections::HashMap; use std::fmt; -use std::iter; -use std::string::String; /// Index referring to an instruction in VCode. -pub type InsnIndex = u32; +pub type InsnIndex = regalloc2::Inst; + /// Index referring to a basic block in VCode. -pub type BlockIndex = u32; +pub type BlockIndex = regalloc2::Block; /// VCodeInst wraps all requirements for a MachInst to be in VCode: it must be /// a `MachInst` and it must be able to emit itself at least to a `SizeCodeSink`. pub trait VCodeInst: MachInst + MachInstEmit {} impl VCodeInst for I {} -/// A function in "VCode" (virtualized-register code) form, after lowering. -/// This is essentially a standard CFG of basic blocks, where each basic block -/// consists of lowered instructions produced by the machine-specific backend. +/// A function in "VCode" (virtualized-register code) form, after +/// lowering. This is essentially a standard CFG of basic blocks, +/// where each basic block consists of lowered instructions produced +/// by the machine-specific backend. +/// +/// Note that the VCode is immutable once produced, and is not +/// modified by register allocation in particular. Rather, register +/// allocation on the `VCode` produces a separate `regalloc2::Output` +/// struct, and this can be passed to `emit`. `emit` in turn does not +/// modify the vcode, but produces an `EmitResult`, which contains the +/// machine code itself, and the associated disassembly and/or +/// metadata as requested. pub struct VCode { - /// Function liveins. - liveins: RegallocSet, - - /// Function liveouts. - liveouts: RegallocSet, - /// VReg IR-level types. vreg_types: Vec, @@ -67,6 +67,27 @@ pub struct VCode { /// Lowered machine instructions in order corresponding to the original IR. insts: Vec, + /// Operands: pre-regalloc references to virtual registers with + /// constraints, in one flattened array. This allows the regalloc + /// to efficiently access all operands without requiring expensive + /// matches or method invocations on insts. + operands: Vec, + + /// Operand index ranges: for each instruction in `insts`, there + /// is a tuple here providing the range in `operands` for that + /// instruction's operands. + operand_ranges: Vec<(u32, u32)>, + + /// Clobbers: a sparse map from instruction indices to clobber lists. + clobber_ranges: FxHashMap, + + /// A flat list of clobbered registers, with index ranges held by + /// `clobber_ranges`. + clobbers: Vec, + + /// Move information: for a given InsnIndex, (src, dst) operand pair. + is_move: FxHashMap, + /// Source locations for each instruction. (`SourceLoc` is a `u32`, so it is /// reasonable to keep one of these per instruction.) srclocs: Vec, @@ -77,13 +98,63 @@ pub struct VCode { /// Block instruction indices. block_ranges: Vec<(InsnIndex, InsnIndex)>, - /// Block successors: index range in the successor-list below. - block_succ_range: Vec<(usize, usize)>, + /// Block successors: index range in the `block_succs_preds` list. + block_succ_range: Vec<(u32, u32)>, - /// Block successor lists, concatenated into one Vec. The `block_succ_range` - /// list of tuples above gives (start, end) ranges within this list that - /// correspond to each basic block's successors. - block_succs: Vec, + /// Block predecessors: index range in the `block_succs_preds` list. + block_pred_range: Vec<(u32, u32)>, + + /// Block successor and predecessor lists, concatenated into one + /// Vec. The `block_succ_range` and `block_pred_range` lists of + /// tuples above give (start, end) ranges within this list that + /// correspond to each basic block's successors or predecessors, + /// respectively. + block_succs_preds: Vec, + + /// Block parameters: index range in `block_params` below. + block_params_range: Vec<(u32, u32)>, + + /// Block parameter lists, concatenated into one vec. The + /// `block_params_range` list of tuples above gives (start, end) + /// ranges within this list that correspond to each basic block's + /// blockparam vregs. + block_params: Vec, + + /// Outgoing block arguments on branch instructions, concatenated + /// into one list. + /// + /// Note that this is conceptually a 3D array: we have a VReg list + /// per block, per successor. We flatten those three dimensions + /// into this 1D vec, then store index ranges in two levels of + /// indirection. + /// + /// Indexed by the indices in `branch_block_arg_succ_range`. + branch_block_args: Vec, + + /// Array of sequences of (start, end) tuples in + /// `branch_block_args`, one for each successor; these sequences + /// for each block are concatenated. + /// + /// Indexed by the indices in `branch_block_arg_succ_range`. + branch_block_arg_range: Vec<(u32, u32)>, + + /// For a given block, indices in `branch_block_arg_range` + /// corresponding to all of its successors. + branch_block_arg_succ_range: Vec<(u32, u32)>, + + /// VReg aliases. Each key in this table is translated to its + /// value when gathering Operands from instructions. Aliases are + /// not chased transitively (we do not further look up the + /// translated reg to see if it is another alias). + /// + /// We use these aliases to rename an instruction's expected + /// result vregs to the returned vregs from lowering, which are + /// usually freshly-allocated temps. + /// + /// Operands and branch arguments will already have been + /// translated through this alias table; but it helps to make + /// sense of instructions when pretty-printed, for example. + vreg_aliases: FxHashMap, /// Block-order information. block_order: BlockLoweringOrder, @@ -95,68 +166,113 @@ pub struct VCode { /// immutable across function compilations within the same module. emit_info: I::Info, - /// Safepoint instruction indices. Filled in post-regalloc. (Prior to - /// regalloc, the safepoint instructions are listed in the separate - /// `StackmapRequestInfo` held separate from the `VCode`.) - safepoint_insns: Vec, + /// Reference-typed `regalloc2::VReg`s. The regalloc requires + /// these in a dense slice (as opposed to querying the + /// reftype-status of each vreg) for efficient iteration. + reftyped_vregs: Vec, - /// For each safepoint entry in `safepoint_insns`, a list of `SpillSlot`s. - /// These are used to generate actual stack maps at emission. Filled in - /// post-regalloc. - safepoint_slots: Vec>, - - /// Do we generate debug info? - generate_debug_info: bool, - - /// Instruction end offsets, instruction indices at each label, - /// total buffer size, and start of cold code. Only present if - /// `generate_debug_info` is set. - insts_layout: RefCell, + /// A set with the same contents as `reftyped_vregs`, in order to + /// avoid inserting more than once. + reftyped_vregs_set: FxHashSet, /// Constants. constants: VCodeConstants, - /// Are any debug value-labels present? If not, we can skip the - /// post-emission analysis. - has_value_labels: bool, + /// Value labels for debuginfo attached to vregs. + debug_value_labels: Vec<(VReg, InsnIndex, InsnIndex, u32)>, } -#[derive(Debug, Default)] -pub(crate) struct InstsLayoutInfo { - pub(crate) inst_end_offsets: Vec, - pub(crate) label_inst_indices: Vec, - pub(crate) start_of_cold_code: Option, +/// The result of `VCode::emit`. Contains all information computed +/// during emission: actual machine code, optionally a disassembly, +/// and optionally metadata about the code layout. +pub struct EmitResult { + /// The MachBuffer containing the machine code. + pub buffer: MachBuffer, + + /// Offset of each basic block, recorded during emission. Computed + /// only if `debug_value_labels` is non-empty. + pub bb_offsets: Vec, + + /// Final basic-block edges, in terms of code offsets of + /// bb-starts. Computed only if `debug_value_labels` is non-empty. + pub bb_edges: Vec<(CodeOffset, CodeOffset)>, + + /// Final instruction offsets, recorded during emission. Computed + /// only if `debug_value_labels` is non-empty. + pub inst_offsets: Vec, + + /// Final length of function body. + pub func_body_len: CodeOffset, + + /// The pretty-printed disassembly, if any. This uses the same + /// pretty-printing for MachInsts as the pre-regalloc VCode Debug + /// implementation, but additionally includes the prologue and + /// epilogue(s), and makes use of the regalloc results. + pub disasm: Option, + + /// Offsets of stackslots. + pub stackslot_offsets: PrimaryMap, + + /// Value-labels information (debug metadata). + pub value_labels_ranges: ValueLabelsRanges, + + /// Stack frame size. + pub frame_size: u32, } -/// A builder for a VCode function body. This builder is designed for the -/// lowering approach that we take: we traverse basic blocks in forward -/// (original IR) order, but within each basic block, we generate code from -/// bottom to top; and within each IR instruction that we visit in this reverse -/// order, we emit machine instructions in *forward* order again. +/// A builder for a VCode function body. /// -/// Hence, to produce the final instructions in proper order, we perform two -/// swaps. First, the machine instructions (`I` instances) are produced in -/// forward order for an individual IR instruction. Then these are *reversed* -/// and concatenated to `bb_insns` at the end of the IR instruction lowering. -/// The `bb_insns` vec will thus contain all machine instructions for a basic -/// block, in reverse order. Finally, when we're done with a basic block, we -/// reverse the whole block's vec of instructions again, and concatenate onto -/// the VCode's insts. +/// This builder has the ability to accept instructions in either +/// forward or reverse order, depending on the pass direction that +/// produces the VCode. The lowering from CLIF to VCode +/// ordinarily occurs in reverse order (in order to allow instructions +/// to be lowered only if used, and not merged) so a reversal will +/// occur at the end of lowering to ensure the VCode is in machine +/// order. +/// +/// If built in reverse, block and instruction indices used once the +/// VCode is built are relative to the final (reversed) order, not the +/// order of construction. Note that this means we do not know the +/// final block or instruction indices when building, so we do not +/// hand them out. (The user is assumed to know them when appending +/// terminator instructions with successor blocks.) pub struct VCodeBuilder { /// In-progress VCode. vcode: VCode, - /// In-progress stack map-request info. - stack_map_info: StackmapRequestInfo, + /// In what direction is the build occuring? + direction: VCodeBuildDirection, /// Index of the last block-start in the vcode. - block_start: InsnIndex, + block_start: usize, /// Start of succs for the current block in the concatenated succs list. succ_start: usize, + /// Start of blockparams for the current block in the concatenated + /// blockparams list. + block_params_start: usize, + + /// Start of successor blockparam arg list entries in + /// the concatenated branch_block_arg_range list. + branch_block_arg_succ_start: usize, + /// Current source location. cur_srcloc: SourceLoc, + + /// Debug-value label in-progress map, keyed by label. For each + /// label, we keep disjoint ranges mapping to vregs. We'll flatten + /// this into (vreg, range, label) tuples when done. + debug_info: FxHashMap>, +} + +/// Direction in which a VCodeBuilder builds VCode. +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum VCodeBuildDirection { + // TODO: add `Forward` once we need it and can test it adequately. + /// Backward-build pass: we expect the producer to call `emit()` + /// with instructions in reverse program order within each block. + Backward, } impl VCodeBuilder { @@ -166,27 +282,19 @@ impl VCodeBuilder { emit_info: I::Info, block_order: BlockLoweringOrder, constants: VCodeConstants, + direction: VCodeBuildDirection, ) -> VCodeBuilder { - let reftype_class = I::ref_type_regclass(abi.flags()); - let vcode = VCode::new( - abi, - emit_info, - block_order, - constants, - /* generate_debug_info = */ true, - ); - let stack_map_info = StackmapRequestInfo { - reftype_class, - reftyped_vregs: vec![], - safepoint_insns: vec![], - }; + let vcode = VCode::new(abi, emit_info, block_order, constants); VCodeBuilder { vcode, - stack_map_info, + direction, block_start: 0, succ_start: 0, + block_params_start: 0, + branch_block_arg_succ_start: 0, cur_srcloc: SourceLoc::default(), + debug_info: FxHashMap::default(), } } @@ -202,18 +310,26 @@ impl VCodeBuilder { /// Set the type of a VReg. pub fn set_vreg_type(&mut self, vreg: VirtualReg, ty: Type) { - if self.vcode.vreg_types.len() <= vreg.get_index() { + if self.vcode.vreg_types.len() <= vreg.index() { self.vcode .vreg_types - .resize(vreg.get_index() + 1, ir::types::I8); + .resize(vreg.index() + 1, ir::types::I8); } - self.vcode.vreg_types[vreg.get_index()] = ty; + self.vcode.vreg_types[vreg.index()] = ty; if is_reftype(ty) { - self.stack_map_info.reftyped_vregs.push(vreg); + let vreg: VReg = vreg.into(); + if self.vcode.reftyped_vregs_set.insert(vreg) { + self.vcode.reftyped_vregs.push(vreg); + } self.vcode.have_ref_values = true; } } + /// Get the type of a VReg. + pub fn get_vreg_type(&self, vreg: VirtualReg) -> Type { + self.vcode.vreg_types[vreg.index()] + } + /// Set the current block as the entry block. pub fn set_entry(&mut self, block: BlockIndex) { self.vcode.entry = block; @@ -223,47 +339,78 @@ impl VCodeBuilder { /// for IR insts and prior to ending the function (building the VCode). pub fn end_bb(&mut self) { let start_idx = self.block_start; - let end_idx = self.vcode.insts.len() as InsnIndex; + let end_idx = self.vcode.insts.len(); self.block_start = end_idx; // Add the instruction index range to the list of blocks. - self.vcode.block_ranges.push((start_idx, end_idx)); + self.vcode + .block_ranges + .push((InsnIndex::new(start_idx), InsnIndex::new(end_idx))); // End the successors list. - let succ_end = self.vcode.block_succs.len(); + let succ_end = self.vcode.block_succs_preds.len(); self.vcode .block_succ_range - .push((self.succ_start, succ_end)); + .push((self.succ_start as u32, succ_end as u32)); self.succ_start = succ_end; + // End the blockparams list. + let block_params_end = self.vcode.block_params.len(); + self.vcode + .block_params_range + .push((self.block_params_start as u32, block_params_end as u32)); + self.block_params_start = block_params_end; + // End the branch blockparam args list. + let branch_block_arg_succ_end = self.vcode.branch_block_arg_range.len(); + self.vcode.branch_block_arg_succ_range.push(( + self.branch_block_arg_succ_start as u32, + branch_block_arg_succ_end as u32, + )); + self.branch_block_arg_succ_start = branch_block_arg_succ_end; } - /// Push an instruction for the current BB and current IR inst within the BB. - pub fn push(&mut self, insn: I, is_safepoint: bool) { + pub fn add_block_param(&mut self, param: VirtualReg, ty: Type) { + self.set_vreg_type(param, ty); + self.vcode.block_params.push(param.into()); + } + + pub fn add_branch_args_for_succ(&mut self, args: &[Reg]) { + let start = self.vcode.branch_block_args.len(); + self.vcode + .branch_block_args + .extend(args.iter().map(|&arg| VReg::from(arg))); + let end = self.vcode.branch_block_args.len(); + self.vcode + .branch_block_arg_range + .push((start as u32, end as u32)); + } + + /// Push an instruction for the current BB and current IR inst + /// within the BB. + pub fn push(&mut self, insn: I) { match insn.is_term() { MachTerminator::None | MachTerminator::Ret => {} MachTerminator::Uncond(target) => { - self.vcode.block_succs.push(BlockIx::new(target.get())); + self.vcode + .block_succs_preds + .push(BlockIndex::new(target.get() as usize)); } MachTerminator::Cond(true_branch, false_branch) => { - self.vcode.block_succs.push(BlockIx::new(true_branch.get())); self.vcode - .block_succs - .push(BlockIx::new(false_branch.get())); + .block_succs_preds + .push(BlockIndex::new(true_branch.get() as usize)); + self.vcode + .block_succs_preds + .push(BlockIndex::new(false_branch.get() as usize)); } MachTerminator::Indirect(targets) => { for target in targets { - self.vcode.block_succs.push(BlockIx::new(target.get())); + self.vcode + .block_succs_preds + .push(BlockIndex::new(target.get() as usize)); } } } - if insn.defines_value_label().is_some() { - self.vcode.has_value_labels = true; - } + self.vcode.insts.push(insn); self.vcode.srclocs.push(self.cur_srcloc); - if is_safepoint { - self.stack_map_info - .safepoint_insns - .push(InstIx::new((self.vcode.insts.len() - 1) as u32)); - } } /// Set the current source location. @@ -271,26 +418,210 @@ impl VCodeBuilder { self.cur_srcloc = srcloc; } + /// Add a debug value label to a register. + pub fn add_value_label(&mut self, reg: Reg, label: ValueLabel) { + // We'll fix up labels in reverse(). Because we're generating + // code bottom-to-top, the liverange of the label goes *from* + // the last index at which was defined (or 0, which is the end + // of the eventual function) *to* just this instruction, and + // no further. + let inst = InsnIndex::new(self.vcode.insts.len()); + let labels = self.debug_info.entry(label).or_insert_with(|| vec![]); + let last = labels + .last() + .map(|(_start, end, _vreg)| *end) + .unwrap_or(InsnIndex::new(0)); + labels.push((last, inst, reg.into())); + } + + pub fn set_vreg_alias(&mut self, from: Reg, to: Reg) { + let from = from.into(); + let resolved_to = self.resolve_vreg_alias(to.into()); + // Disallow cycles (see below). + assert_ne!(resolved_to, from); + self.vcode.vreg_aliases.insert(from, resolved_to); + } + + pub fn resolve_vreg_alias(&self, from: regalloc2::VReg) -> regalloc2::VReg { + Self::resolve_vreg_alias_impl(&self.vcode.vreg_aliases, from) + } + + fn resolve_vreg_alias_impl( + aliases: &FxHashMap, + from: regalloc2::VReg, + ) -> regalloc2::VReg { + // We prevent cycles from existing by resolving targets of + // aliases eagerly before setting them. If the target resolves + // to the origin of the alias, then a cycle would be created + // and the alias is disallowed. Because of the structure of + // SSA code (one instruction can refer to another's defs but + // not vice-versa, except indirectly through + // phis/blockparams), cycles should not occur as we use + // aliases to redirect vregs to the temps that actually define + // them. + + let mut vreg = from; + while let Some(to) = aliases.get(&vreg) { + vreg = *to; + } + vreg + } + /// Access the constants. pub fn constants(&mut self) -> &mut VCodeConstants { &mut self.vcode.constants } - /// Build the final VCode, returning the vcode itself as well as auxiliary - /// information, such as the stack map request information. - pub fn build(self) -> (VCode, StackmapRequestInfo) { - // TODO: come up with an abstraction for "vcode and auxiliary data". The - // auxiliary data needs to be separate from the vcode so that it can be - // referenced as the vcode is mutated (e.g. by the register allocator). - (self.vcode, self.stack_map_info) - } -} + fn compute_preds_from_succs(&mut self) { + // Compute predecessors from successors. In order to gather + // all preds for a block into a contiguous sequence, we build + // a list of (succ, pred) tuples and then sort. + let mut succ_pred_edges: Vec<(BlockIndex, BlockIndex)> = + Vec::with_capacity(self.vcode.block_succs_preds.len()); + for (pred, &(start, end)) in self.vcode.block_succ_range.iter().enumerate() { + let pred = BlockIndex::new(pred); + for i in start..end { + let succ = BlockIndex::new(self.vcode.block_succs_preds[i as usize].index()); + succ_pred_edges.push((succ, pred)); + } + } + succ_pred_edges.sort_unstable(); -fn is_redundant_move(insn: &I) -> bool { - if let Some((to, from)) = insn.is_move() { - to.to_reg() == from - } else { - false + let mut i = 0; + for succ in 0..self.vcode.num_blocks() { + let succ = BlockIndex::new(succ); + let start = self.vcode.block_succs_preds.len(); + while i < succ_pred_edges.len() && succ_pred_edges[i].0 == succ { + let pred = succ_pred_edges[i].1; + self.vcode.block_succs_preds.push(pred); + i += 1; + } + let end = self.vcode.block_succs_preds.len(); + self.vcode.block_pred_range.push((start as u32, end as u32)); + } + } + + /// Called once, when a build in Backward order is complete, to + /// perform the overall reversal (into final forward order) and + /// finalize metadata accordingly. + fn reverse_and_finalize(&mut self) { + let n_insts = self.vcode.insts.len(); + if n_insts == 0 { + return; + } + + // Reverse the per-block and per-inst sequences. + self.vcode.block_ranges.reverse(); + // block_params_range is indexed by block (and blocks were + // traversed in reverse) so we reverse it; but block-param + // sequences in the concatenated vec can remain in reverse + // order (it is effectively an arena of arbitrarily-placed + // referenced sequences). + self.vcode.block_params_range.reverse(); + // Likewise, we reverse block_succ_range, but the block_succ + // concatenated array can remain as-is. + self.vcode.block_succ_range.reverse(); + self.vcode.insts.reverse(); + self.vcode.srclocs.reverse(); + // Likewise, branch_block_arg_succ_range is indexed by block + // so must be reversed. + self.vcode.branch_block_arg_succ_range.reverse(); + + // To translate an instruction index *endpoint* in reversed + // order to forward order, compute `n_insts - i`. + // + // Why not `n_insts - 1 - i`? That would be correct to + // translate an individual instruction index (for ten insts 0 + // to 9 inclusive, inst 0 becomes 9, and inst 9 becomes + // 0). But for the usual inclusive-start, exclusive-end range + // idiom, inclusive starts become exclusive ends and + // vice-versa, so e.g. an (inclusive) start of 0 becomes an + // (exclusive) end of 10. + let translate = |inst: InsnIndex| InsnIndex::new(n_insts - inst.index()); + + // Edit the block-range instruction indices. + for tuple in &mut self.vcode.block_ranges { + let (start, end) = *tuple; + *tuple = (translate(end), translate(start)); // Note reversed order. + } + + // Generate debug-value labels based on per-label maps. + for (label, tuples) in &self.debug_info { + for &(start, end, vreg) in tuples { + let vreg = self.resolve_vreg_alias(vreg); + let fwd_start = translate(end); + let fwd_end = translate(start); + self.vcode + .debug_value_labels + .push((vreg, fwd_start, fwd_end, label.as_u32())); + } + } + + // Now sort debug value labels by VReg, as required + // by regalloc2. + self.vcode + .debug_value_labels + .sort_unstable_by_key(|(vreg, _, _, _)| *vreg); + } + + fn collect_operands(&mut self) { + for (i, insn) in self.vcode.insts.iter().enumerate() { + // Push operands from the instruction onto the operand list. + // + // We rename through the vreg alias table as we collect + // the operands. This is better than a separate post-pass + // over operands, because it has more cache locality: + // operands only need to pass through L1 once. This is + // also better than renaming instructions' + // operands/registers while lowering, because here we only + // need to do the `match` over the instruction to visit + // its register fields (which is slow, branchy code) once. + + let vreg_aliases = &self.vcode.vreg_aliases; + let mut op_collector = OperandCollector::new(&mut self.vcode.operands, |vreg| { + Self::resolve_vreg_alias_impl(vreg_aliases, vreg) + }); + insn.get_operands(&mut op_collector); + let (ops, clobbers) = op_collector.finish(); + self.vcode.operand_ranges.push(ops); + + if !clobbers.is_empty() { + let start = self.vcode.clobbers.len(); + self.vcode.clobbers.extend(clobbers.into_iter()); + let end = self.vcode.clobbers.len(); + self.vcode + .clobber_ranges + .insert(InsnIndex::new(i), (start as u32, end as u32)); + } + + if let Some((dst, src)) = insn.is_move() { + let src = Operand::reg_use(Self::resolve_vreg_alias_impl(vreg_aliases, src.into())); + let dst = Operand::reg_def(Self::resolve_vreg_alias_impl( + vreg_aliases, + dst.to_reg().into(), + )); + // Note that regalloc2 requires these in (src, dst) order. + self.vcode.is_move.insert(InsnIndex::new(i), (src, dst)); + } + } + + // Translate blockparam args via the vreg aliases table as well. + for arg in &mut self.vcode.branch_block_args { + let new_arg = Self::resolve_vreg_alias_impl(&self.vcode.vreg_aliases, *arg); + log::trace!("operandcollector: block arg {:?} -> {:?}", arg, new_arg); + *arg = new_arg; + } + } + + /// Build the final VCode. + pub fn build(mut self) -> VCode { + if self.direction == VCodeBuildDirection::Backward { + self.reverse_and_finalize(); + } + self.collect_operands(); + self.compute_preds_from_succs(); + self.vcode.debug_value_labels.sort_unstable(); + self.vcode } } @@ -306,228 +637,222 @@ impl VCode { emit_info: I::Info, block_order: BlockLoweringOrder, constants: VCodeConstants, - generate_debug_info: bool, ) -> VCode { + let n_blocks = block_order.lowered_order().len(); VCode { - liveins: abi.liveins(), - liveouts: abi.liveouts(), vreg_types: vec![], have_ref_values: false, - insts: vec![], - srclocs: vec![], - entry: 0, - block_ranges: vec![], - block_succ_range: vec![], - block_succs: vec![], + insts: Vec::with_capacity(10 * n_blocks), + operands: Vec::with_capacity(30 * n_blocks), + operand_ranges: Vec::with_capacity(10 * n_blocks), + clobber_ranges: FxHashMap::default(), + clobbers: vec![], + is_move: FxHashMap::default(), + srclocs: Vec::with_capacity(10 * n_blocks), + entry: BlockIndex::new(0), + block_ranges: Vec::with_capacity(n_blocks), + block_succ_range: Vec::with_capacity(n_blocks), + block_succs_preds: Vec::with_capacity(2 * n_blocks), + block_pred_range: Vec::with_capacity(n_blocks), + block_params_range: Vec::with_capacity(n_blocks), + block_params: Vec::with_capacity(5 * n_blocks), + branch_block_args: Vec::with_capacity(10 * n_blocks), + branch_block_arg_range: Vec::with_capacity(2 * n_blocks), + branch_block_arg_succ_range: Vec::with_capacity(n_blocks), block_order, abi, emit_info, - safepoint_insns: vec![], - safepoint_slots: vec![], - generate_debug_info, - insts_layout: RefCell::new(Default::default()), + reftyped_vregs: vec![], + reftyped_vregs_set: FxHashSet::default(), constants, - has_value_labels: false, + debug_value_labels: vec![], + vreg_aliases: FxHashMap::with_capacity_and_hasher(10 * n_blocks, Default::default()), } } - /// Returns the flags controlling this function's compilation. - pub fn flags(&self) -> &settings::Flags { - self.abi.flags() - } - - /// Get the IR-level type of a VReg. - pub fn vreg_type(&self, vreg: VirtualReg) -> Type { - self.vreg_types[vreg.get_index()] - } - /// Get the number of blocks. Block indices will be in the range `0 .. /// (self.num_blocks() - 1)`. pub fn num_blocks(&self) -> usize { self.block_ranges.len() } - /// Stack frame size for the full function's body. - pub fn frame_size(&self) -> u32 { - self.abi.frame_size() - } - /// Get the successors for a block. - pub fn succs(&self, block: BlockIndex) -> &[BlockIx] { - let (start, end) = self.block_succ_range[block as usize]; - &self.block_succs[start..end] + pub fn succs(&self, block: BlockIndex) -> &[BlockIndex] { + let (start, end) = self.block_succ_range[block.index()]; + &self.block_succs_preds[start as usize..end as usize] } - /// Take the results of register allocation, with a sequence of - /// instructions including spliced fill/reload/move instructions, and replace - /// the VCode with them. - pub fn replace_insns_from_regalloc(&mut self, result: RegAllocResult) { - // Record the spillslot count and clobbered registers for the ABI/stack - // setup code. - self.abi.set_num_spillslots(result.num_spill_slots as usize); - self.abi - .set_clobbered(result.clobbered_registers.map(|r| Writable::from_reg(*r))); + fn compute_clobbers(&self, regalloc: ®alloc2::Output) -> Vec> { + // Compute clobbered registers. + let mut clobbered = vec![]; + let mut clobbered_set = FxHashSet::default(); - let mut final_insns = vec![]; - let mut final_block_ranges = vec![(0, 0); self.num_blocks()]; - let mut final_srclocs = vec![]; - let mut final_safepoint_insns = vec![]; - let mut safept_idx = 0; - - assert!(result.target_map.elems().len() == self.num_blocks()); - for block in 0..self.num_blocks() { - let start = result.target_map.elems()[block].get() as usize; - let end = if block == self.num_blocks() - 1 { - result.insns.len() - } else { - result.target_map.elems()[block + 1].get() as usize - }; - let block = block as BlockIndex; - let final_start = final_insns.len() as InsnIndex; - - if block == self.entry { - // Start with the prologue. - let prologue = self.abi.gen_prologue(); - let len = prologue.len(); - final_insns.extend(prologue.into_iter()); - final_srclocs.extend(iter::repeat(SourceLoc::default()).take(len)); - } - - for i in start..end { - let insn = &result.insns[i]; - - // Elide redundant moves at this point (we only know what is - // redundant once registers are allocated). - if is_redundant_move(insn) { - continue; - } - - // Is there a srcloc associated with this insn? Look it up based on original - // instruction index (if new insn corresponds to some original insn, i.e., is not - // an inserted load/spill/move). - let orig_iix = result.orig_insn_map[InstIx::new(i as u32)]; - let srcloc = if orig_iix.is_invalid() { - SourceLoc::default() - } else { - self.srclocs[orig_iix.get() as usize] - }; - - // Whenever encountering a return instruction, replace it - // with the epilogue. - let is_ret = insn.is_term() == MachTerminator::Ret; - if is_ret { - let epilogue = self.abi.gen_epilogue(); - let len = epilogue.len(); - final_insns.extend(epilogue.into_iter()); - final_srclocs.extend(iter::repeat(srcloc).take(len)); - } else { - final_insns.push(insn.clone()); - final_srclocs.push(srcloc); - } - - // Was this instruction a safepoint instruction? Add its final - // index to the safepoint insn-index list if so. - if safept_idx < result.new_safepoint_insns.len() - && (result.new_safepoint_insns[safept_idx].get() as usize) == i - { - let idx = final_insns.len() - 1; - final_safepoint_insns.push(idx as InsnIndex); - safept_idx += 1; + // All moves are included in clobbers. + for edit in ®alloc.edits { + let Edit::Move { to, .. } = edit.1; + if let Some(preg) = to.as_reg() { + let reg = RealReg::from(preg); + if clobbered_set.insert(reg) { + clobbered.push(Writable::from_reg(reg)); } } - - let final_end = final_insns.len() as InsnIndex; - final_block_ranges[block as usize] = (final_start, final_end); } - debug_assert!(final_insns.len() == final_srclocs.len()); + for (i, (start, end)) in self.operand_ranges.iter().enumerate() { + // Skip this instruction if not "included in clobbers" as + // per the MachInst. (Some backends use this to implement + // ABI specifics; e.g., excluding calls of the same ABI as + // the current function from clobbers, because by + // definition everything clobbered by the call can be + // clobbered by this function without saving as well.) + if !self.insts[i].is_included_in_clobbers() { + continue; + } - self.insts = final_insns; - self.srclocs = final_srclocs; - self.block_ranges = final_block_ranges; - self.safepoint_insns = final_safepoint_insns; + let start = *start as usize; + let end = *end as usize; + let operands = &self.operands[start..end]; + let allocs = ®alloc.allocs[start..end]; + for (operand, alloc) in operands.iter().zip(allocs.iter()) { + // We're interested only in writes (Mods or Defs). + if operand.kind() == OperandKind::Use { + continue; + } + if let Some(preg) = alloc.as_reg() { + let reg = RealReg::from(preg); + if clobbered_set.insert(reg) { + clobbered.push(Writable::from_reg(reg)); + } + } + } - // Save safepoint slot-lists. These will be passed to the `EmitState` - // for the machine backend during emission so that it can do - // target-specific translations of slot numbers to stack offsets. - self.safepoint_slots = result.stackmaps; + // Also add explicitly-clobbered registers. + if let Some(&(start, end)) = self.clobber_ranges.get(&InsnIndex::new(i)) { + let inst_clobbers = &self.clobbers[(start as usize)..(end as usize)]; + for &preg in inst_clobbers { + let reg = RealReg::from(preg); + if clobbered_set.insert(reg) { + clobbered.push(Writable::from_reg(reg)); + } + } + } + } + + clobbered } - /// Emit the instructions to a `MachBuffer`, containing fixed-up code and external - /// reloc/trap/etc. records ready for use. + /// Emit the instructions to a `MachBuffer`, containing fixed-up + /// code and external reloc/trap/etc. records ready for use. Takes + /// the regalloc results as well. + /// + /// Returns the machine code itself, and optionally metadata + /// and/or a disassembly, as an `EmitResult`. The `VCode` itself + /// is consumed by the emission process. pub fn emit( - &self, - ) -> ( - MachBuffer, - Vec, - Vec<(CodeOffset, CodeOffset)>, - ) + mut self, + regalloc: ®alloc2::Output, + want_disasm: bool, + want_metadata: bool, + ) -> EmitResult where I: MachInstEmit, { + // To write into disasm string. + use core::fmt::Write; + let _tt = timing::vcode_emit(); let mut buffer = MachBuffer::new(); - let mut state = I::State::new(&*self.abi); - let cfg_metadata = self.flags().machine_code_cfg_info(); let mut bb_starts: Vec> = vec![]; // The first M MachLabels are reserved for block indices, the next N MachLabels for // constants. - buffer.reserve_labels_for_blocks(self.num_blocks() as BlockIndex); + buffer.reserve_labels_for_blocks(self.num_blocks()); buffer.reserve_labels_for_constants(&self.constants); - let mut inst_end_offsets = vec![0; self.insts.len()]; - let mut label_inst_indices = vec![0; self.num_blocks()]; - - // Map from instruction index to index in - // `safepoint_slots`. We need this because we emit - // instructions out-of-order, while the safepoint_insns / - // safepoint_slots data structures are sorted in instruction - // order. - let mut safepoint_indices: FxHashMap = FxHashMap::default(); - for (safepoint_idx, iix) in self.safepoint_insns.iter().enumerate() { - // Disregard safepoints that ended up having no live refs. - if self.safepoint_slots[safepoint_idx].len() > 0 { - safepoint_indices.insert(*iix, safepoint_idx); - } - } - // Construct the final order we emit code in: cold blocks at the end. let mut final_order: SmallVec<[BlockIndex; 16]> = smallvec![]; let mut cold_blocks: SmallVec<[BlockIndex; 16]> = smallvec![]; for block in 0..self.num_blocks() { - let block = block as BlockIndex; + let block = BlockIndex::new(block); if self.block_order.is_cold(block) { cold_blocks.push(block); } else { final_order.push(block); } } - let first_cold_block = cold_blocks.first().cloned(); final_order.extend(cold_blocks.clone()); + // Compute/save info we need for the prologue: clobbers and + // number of spillslots. + // + // We clone `abi` here because we will mutate it as we + // generate the prologue and set other info, but we can't + // mutate `VCode`. The info it usually carries prior to + // setting clobbers is fairly minimal so this should be + // relatively cheap. + let clobbers = self.compute_clobbers(regalloc); + self.abi.set_num_spillslots(regalloc.num_spillslots); + self.abi.set_clobbered(clobbers); + + // We need to generate the prologue in order to get the ABI + // object into the right state first. We'll emit it when we + // hit the right block below. + let prologue_insts = self.abi.gen_prologue(); + // Emit blocks. let mut cur_srcloc = None; let mut last_offset = None; - let mut start_of_cold_code = None; + let mut inst_offsets = vec![]; + let mut state = I::State::new(&*self.abi); + + let mut disasm = String::new(); + + if !self.debug_value_labels.is_empty() { + inst_offsets.resize(self.insts.len(), 0); + } + for block in final_order { + log::trace!("emitting block {:?}", block); let new_offset = I::align_basic_block(buffer.cur_offset()); while new_offset > buffer.cur_offset() { // Pad with NOPs up to the aligned block offset. let nop = I::gen_nop((new_offset - buffer.cur_offset()) as usize); - nop.emit(&mut buffer, &self.emit_info, &mut Default::default()); + nop.emit(&[], &mut buffer, &self.emit_info, &mut Default::default()); } assert_eq!(buffer.cur_offset(), new_offset); - if Some(block) == first_cold_block { - start_of_cold_code = Some(buffer.cur_offset()); + let do_emit = |inst: &I, + allocs: &[Allocation], + disasm: &mut String, + buffer: &mut MachBuffer, + state: &mut I::State| { + if want_disasm { + let mut s = state.clone(); + writeln!(disasm, " {}", inst.pretty_print_inst(allocs, &mut s)).unwrap(); + } + inst.emit(allocs, buffer, &self.emit_info, state); + }; + + // Is this the first block? Emit the prologue directly if so. + if block == self.entry { + log::trace!(" -> entry block"); + buffer.start_srcloc(SourceLoc::default()); + state.pre_sourceloc(SourceLoc::default()); + for inst in &prologue_insts { + do_emit(&inst, &[], &mut disasm, &mut buffer, &mut state); + } + buffer.end_srcloc(); } - let (start, end) = self.block_ranges[block as usize]; - buffer.bind_label(MachLabel::from_block(block)); - label_inst_indices[block as usize] = start; + // Now emit the regular block body. - if cfg_metadata { + buffer.bind_label(MachLabel::from_block(block)); + + if want_disasm { + writeln!(&mut disasm, "block{}:", block.index()).unwrap(); + } + + if want_metadata { // Track BB starts. If we have backed up due to MachBuffer // branch opts, note that the removed blocks were removed. let cur_offset = buffer.cur_offset(); @@ -543,38 +868,139 @@ impl VCode { last_offset = Some(cur_offset); } - for iix in start..end { - let srcloc = self.srclocs[iix as usize]; - if cur_srcloc != Some(srcloc) { - if cur_srcloc.is_some() { - buffer.end_srcloc(); - } - buffer.start_srcloc(srcloc); - cur_srcloc = Some(srcloc); - } - state.pre_sourceloc(cur_srcloc.unwrap_or(SourceLoc::default())); + for inst_or_edit in regalloc.block_insts_and_edits(&self, block) { + match inst_or_edit { + InstOrEdit::Inst(iix) => { + if !self.debug_value_labels.is_empty() { + // If we need to produce debug info, + // record the offset of each instruction + // so that we can translate value-label + // ranges to machine-code offsets. - if let Some(safepoint_idx) = safepoint_indices.get(&iix) { - let stack_map = self - .abi - .spillslots_to_stack_map(&self.safepoint_slots[*safepoint_idx][..], &state); - state.pre_safepoint(stack_map); - } + // Cold blocks violate monotonicity + // assumptions elsewhere (that + // instructions in inst-index order are in + // order in machine code), so we omit + // their offsets here. Value-label range + // generation below will skip empty ranges + // and ranges with to-offsets of zero. + if !self.block_order.is_cold(block) { + inst_offsets[iix.index()] = buffer.cur_offset(); + } + } - self.insts[iix as usize].emit(&mut buffer, &self.emit_info, &mut state); + if self.insts[iix.index()].is_move().is_some() { + // Skip moves in the pre-regalloc program; + // all of these are incorporated by the + // regalloc into its unified move handling + // and they come out the other end, if + // still needed (not elided), as + // regalloc-inserted moves. + continue; + } - if self.generate_debug_info { - // Buffer truncation may have happened since last inst append; trim inst-end - // layout info as appropriate. - let l = &mut inst_end_offsets[0..iix as usize]; - for end in l.iter_mut().rev() { - if *end > buffer.cur_offset() { - *end = buffer.cur_offset(); + // Update the srcloc at this point in the buffer. + let srcloc = self.srclocs[iix.index()]; + if cur_srcloc != Some(srcloc) { + if cur_srcloc.is_some() { + buffer.end_srcloc(); + } + buffer.start_srcloc(srcloc); + cur_srcloc = Some(srcloc); + } + state.pre_sourceloc(cur_srcloc.unwrap_or(SourceLoc::default())); + + // If this is a safepoint, compute a stack map + // and pass it to the emit state. + if self.insts[iix.index()].is_safepoint() { + let mut safepoint_slots: SmallVec<[SpillSlot; 8]> = smallvec![]; + // Find the contiguous range of + // (progpoint, allocation) safepoint slot + // records in `regalloc.safepoint_slots` + // for this instruction index. + let safepoint_slots_start = regalloc + .safepoint_slots + .binary_search_by(|(progpoint, _alloc)| { + if progpoint.inst() >= iix { + std::cmp::Ordering::Greater + } else { + std::cmp::Ordering::Less + } + }) + .unwrap_err(); + + for (_, alloc) in regalloc.safepoint_slots[safepoint_slots_start..] + .iter() + .take_while(|(progpoint, _)| progpoint.inst() == iix) + { + let slot = alloc.as_stack().unwrap(); + safepoint_slots.push(slot); + } + if !safepoint_slots.is_empty() { + let stack_map = self + .abi + .spillslots_to_stack_map(&safepoint_slots[..], &state); + state.pre_safepoint(stack_map); + } + } + + // Get the allocations for this inst from the regalloc result. + let allocs = regalloc.inst_allocs(iix); + + // If the instruction we are about to emit is + // a return, place an epilogue at this point + // (and don't emit the return; the actual + // epilogue will contain it). + if self.insts[iix.index()].is_term() == MachTerminator::Ret { + for inst in self.abi.gen_epilogue() { + do_emit(&inst, &[], &mut disasm, &mut buffer, &mut state); + } } else { - break; + // Emit the instruction! + do_emit( + &self.insts[iix.index()], + allocs, + &mut disasm, + &mut buffer, + &mut state, + ); + } + } + + InstOrEdit::Edit(Edit::Move { from, to }) => { + // Create a move/spill/reload instruction and + // immediately emit it. + match (from.as_reg(), to.as_reg()) { + (Some(from), Some(to)) => { + // Reg-to-reg move. + let from_rreg = Reg::from(from); + let to_rreg = Writable::from_reg(Reg::from(to)); + debug_assert_eq!(from.class(), to.class()); + let ty = I::canonical_type_for_rc(from.class()); + let mv = I::gen_move(to_rreg, from_rreg, ty); + do_emit(&mv, &[], &mut disasm, &mut buffer, &mut state); + } + (Some(from), None) => { + // Spill from register to spillslot. + let to = to.as_stack().unwrap(); + let from_rreg = RealReg::from(from); + debug_assert_eq!(from.class(), to.class()); + let spill = self.abi.gen_spill(to, from_rreg); + do_emit(&spill, &[], &mut disasm, &mut buffer, &mut state); + } + (None, Some(to)) => { + // Load from spillslot to register. + let from = from.as_stack().unwrap(); + let to_rreg = Writable::from_reg(RealReg::from(to)); + debug_assert_eq!(from.class(), to.class()); + let reload = self.abi.gen_reload(to_rreg, from); + do_emit(&reload, &[], &mut disasm, &mut buffer, &mut state); + } + (None, None) => { + panic!("regalloc2 should have eliminated stack-to-stack moves!"); + } } } - inst_end_offsets[iix as usize] = buffer.cur_offset(); } } @@ -583,13 +1009,14 @@ impl VCode { cur_srcloc = None; } - // Do we need an island? Get the worst-case size of the next BB and see if, having - // emitted that many bytes, we will be beyond the deadline. - if block < (self.num_blocks() - 1) as BlockIndex { - let next_block = block + 1; - let next_block_range = self.block_ranges[next_block as usize]; - let next_block_size = next_block_range.1 - next_block_range.0; - let worst_case_next_bb = I::worst_case_size() * next_block_size; + // Do we need an island? Get the worst-case size of the + // next BB and see if, having emitted that many bytes, we + // will be beyond the deadline. + if block.index() < (self.num_blocks() - 1) { + let next_block = block.index() + 1; + let next_block_range = self.block_ranges[next_block]; + let next_block_size = next_block_range.1.index() - next_block_range.0.index(); + let worst_case_next_bb = I::worst_case_size() * next_block_size as u32; if buffer.island_needed(worst_case_next_bb) { buffer.emit_island(worst_case_next_bb); } @@ -602,25 +1029,12 @@ impl VCode { buffer.defer_constant(label, data.alignment(), data.as_slice(), u32::max_value()); } - if self.generate_debug_info { - for end in inst_end_offsets.iter_mut().rev() { - if *end > buffer.cur_offset() { - *end = buffer.cur_offset(); - } else { - break; - } - } - *self.insts_layout.borrow_mut() = InstsLayoutInfo { - inst_end_offsets, - label_inst_indices, - start_of_cold_code, - }; - } + let func_body_len = buffer.cur_offset(); // Create `bb_edges` and final (filtered) `bb_starts`. - let mut final_bb_starts = vec![]; let mut bb_edges = vec![]; - if cfg_metadata { + let mut bb_offsets = vec![]; + if want_metadata { for block in 0..self.num_blocks() { if bb_starts[block].is_none() { // Block was deleted by MachBuffer; skip. @@ -628,157 +1042,248 @@ impl VCode { } let from = bb_starts[block].unwrap(); - final_bb_starts.push(from); + bb_offsets.push(from); // Resolve each `succ` label and add edges. - let succs = self.block_succs(BlockIx::new(block as u32)); - for succ in succs.iter() { - let to = buffer.resolve_label_offset(MachLabel::from_block(succ.get())); + let succs = self.block_succs(BlockIndex::new(block)); + for &succ in succs.iter() { + let to = buffer.resolve_label_offset(MachLabel::from_block(succ)); bb_edges.push((from, to)); } } } - (buffer, final_bb_starts, bb_edges) + let value_labels_ranges = + self.compute_value_labels_ranges(regalloc, &inst_offsets[..], func_body_len); + let frame_size = self.abi.frame_size(); + + EmitResult { + buffer, + bb_offsets, + bb_edges, + inst_offsets, + func_body_len, + disasm: if want_disasm { Some(disasm) } else { None }, + stackslot_offsets: self.abi.stackslot_offsets().clone(), + value_labels_ranges, + frame_size, + } } - /// Generates value-label ranges. - pub fn value_labels_ranges(&self) -> ValueLabelsRanges { - if !self.has_value_labels { + fn compute_value_labels_ranges( + &self, + regalloc: ®alloc2::Output, + inst_offsets: &[CodeOffset], + func_body_len: u32, + ) -> ValueLabelsRanges { + if self.debug_value_labels.is_empty() { return ValueLabelsRanges::default(); } - let layout_info = &self.insts_layout.borrow(); - debug::compute(&self.insts, &*layout_info) - } + let mut value_labels_ranges: ValueLabelsRanges = HashMap::new(); + for &(label, from, to, alloc) in ®alloc.debug_locations { + let ranges = value_labels_ranges + .entry(ValueLabel::from_u32(label)) + .or_insert_with(|| vec![]); + let from_offset = inst_offsets[from.inst().index()]; + let to_offset = if to.inst().index() == inst_offsets.len() { + func_body_len + } else { + inst_offsets[to.inst().index()] + }; - /// Get the offsets of stackslots. - pub fn stackslot_offsets(&self) -> &PrimaryMap { - self.abi.stackslot_offsets() + // Empty range or to-offset of zero can happen because of + // cold blocks (see above). + if to_offset == 0 || from_offset == to_offset { + continue; + } + + let loc = if let Some(preg) = alloc.as_reg() { + LabelValueLoc::Reg(Reg::from(preg)) + } else { + // We can't translate spillslot locations at the + // moment because ValueLabelLoc requires an + // instantaneous SP offset, and this can *change* + // within the range we have here because of callsites + // adjusting SP temporarily. To avoid the complexity + // of accurately plumbing through nominal-SP + // adjustment sites, we just omit debug info for + // values that are spilled. Not ideal, but debug info + // is best-effort. + continue; + }; + + ranges.push(ValueLocRange { + loc, + // ValueLocRanges are recorded by *instruction-end + // offset*. `from_offset` is the *start* of the + // instruction; that is the same as the end of another + // instruction, so we only want to begin coverage once + // we are past the previous instruction's end. + start: from_offset + 1, + // Likewise, `end` is exclusive, but we want to + // *include* the end of the last + // instruction. `to_offset` is the start of the + // `to`-instruction, which is the exclusive end, i.e., + // the first instruction not covered. That + // instruction's start is the same as the end of the + // last instruction that is included, so we go one + // byte further to be sure to include it. + end: to_offset + 1, + }); + } + + value_labels_ranges } /// Get the IR block for a BlockIndex, if one exists. pub fn bindex_to_bb(&self, block: BlockIndex) -> Option { - self.block_order.lowered_order()[block as usize].orig_block() + self.block_order.lowered_order()[block.index()].orig_block() } } impl RegallocFunction for VCode { - type Inst = I; - - fn insns(&self) -> &[I] { - &self.insts[..] + fn num_insts(&self) -> usize { + self.insts.len() } - fn insns_mut(&mut self) -> &mut [I] { - &mut self.insts[..] + fn num_blocks(&self) -> usize { + self.block_ranges.len() } - fn get_insn(&self, insn: InstIx) -> &I { - &self.insts[insn.get() as usize] + fn entry_block(&self) -> BlockIndex { + self.entry } - fn get_insn_mut(&mut self, insn: InstIx) -> &mut I { - &mut self.insts[insn.get() as usize] + fn block_insns(&self, block: BlockIndex) -> InstRange { + let (start, end) = self.block_ranges[block.index()]; + InstRange::forward(start, end) } - fn blocks(&self) -> Range { - Range::new(BlockIx::new(0), self.block_ranges.len()) + fn block_succs(&self, block: BlockIndex) -> &[BlockIndex] { + let (start, end) = self.block_succ_range[block.index()]; + &self.block_succs_preds[start as usize..end as usize] } - fn entry_block(&self) -> BlockIx { - BlockIx::new(self.entry) + fn block_preds(&self, block: BlockIndex) -> &[BlockIndex] { + let (start, end) = self.block_pred_range[block.index()]; + &self.block_succs_preds[start as usize..end as usize] } - fn block_insns(&self, block: BlockIx) -> Range { - let (start, end) = self.block_ranges[block.get() as usize]; - Range::new(InstIx::new(start), (end - start) as usize) + fn block_params(&self, block: BlockIndex) -> &[VReg] { + let (start, end) = self.block_params_range[block.index()]; + &self.block_params[start as usize..end as usize] } - fn block_succs(&self, block: BlockIx) -> Cow<[BlockIx]> { - let (start, end) = self.block_succ_range[block.get() as usize]; - Cow::Borrowed(&self.block_succs[start..end]) + fn branch_blockparams(&self, block: BlockIndex, _insn: InsnIndex, succ_idx: usize) -> &[VReg] { + let (succ_range_start, succ_range_end) = self.branch_block_arg_succ_range[block.index()]; + let succ_ranges = + &self.branch_block_arg_range[succ_range_start as usize..succ_range_end as usize]; + let (branch_block_args_start, branch_block_args_end) = succ_ranges[succ_idx]; + &self.branch_block_args[branch_block_args_start as usize..branch_block_args_end as usize] } - fn is_ret(&self, insn: InstIx) -> bool { - match self.insts[insn.get() as usize].is_term() { + fn is_ret(&self, insn: InsnIndex) -> bool { + match self.insts[insn.index()].is_term() { MachTerminator::Ret => true, _ => false, } } - fn is_included_in_clobbers(&self, insn: &I) -> bool { - insn.is_included_in_clobbers() + fn is_branch(&self, insn: InsnIndex) -> bool { + match self.insts[insn.index()].is_term() { + MachTerminator::Cond(..) + | MachTerminator::Uncond(..) + | MachTerminator::Indirect(..) => true, + _ => false, + } } - fn get_regs(insn: &I, collector: &mut RegUsageCollector) { - insn.get_regs(collector) + fn requires_refs_on_stack(&self, insn: InsnIndex) -> bool { + self.insts[insn.index()].is_safepoint() } - fn map_regs(insn: &mut I, mapper: &RUM) { - insn.map_regs(mapper); + fn is_move(&self, insn: InsnIndex) -> Option<(Operand, Operand)> { + self.is_move.get(&insn).cloned() } - fn is_move(&self, insn: &I) -> Option<(Writable, Reg)> { - insn.is_move() + fn inst_operands(&self, insn: InsnIndex) -> &[Operand] { + let (start, end) = self.operand_ranges[insn.index()]; + &self.operands[start as usize..end as usize] } - fn get_num_vregs(&self) -> usize { - self.vreg_types.len() + fn inst_clobbers(&self, insn: InsnIndex) -> &[PReg] { + if let Some(&(start, end)) = self.clobber_ranges.get(&insn) { + &self.clobbers[start as usize..end as usize] + } else { + &[] + } } - fn get_spillslot_size(&self, regclass: RegClass, _: VirtualReg) -> u32 { - self.abi.get_spillslot_size(regclass) + fn num_vregs(&self) -> usize { + std::cmp::max(self.vreg_types.len(), first_user_vreg_index()) } - fn gen_spill(&self, to_slot: SpillSlot, from_reg: RealReg, _: Option) -> I { - self.abi.gen_spill(to_slot, from_reg) + fn reftype_vregs(&self) -> &[VReg] { + &self.reftyped_vregs[..] } - fn gen_reload( - &self, - to_reg: Writable, - from_slot: SpillSlot, - _: Option, - ) -> I { - self.abi.gen_reload(to_reg, from_slot) + fn debug_value_labels(&self) -> &[(VReg, InsnIndex, InsnIndex, u32)] { + &self.debug_value_labels[..] } - fn gen_move(&self, to_reg: Writable, from_reg: RealReg, vreg: VirtualReg) -> I { - let ty = self.vreg_type(vreg); - I::gen_move(to_reg.map(|r| r.to_reg()), from_reg.to_reg(), ty) + fn is_pinned_vreg(&self, vreg: VReg) -> Option { + pinned_vreg_to_preg(vreg) } - fn gen_zero_len_nop(&self) -> I { - I::gen_nop(0) + fn spillslot_size(&self, regclass: RegClass) -> usize { + self.abi.get_spillslot_size(regclass) as usize } - fn maybe_direct_reload(&self, insn: &I, reg: VirtualReg, slot: SpillSlot) -> Option { - insn.maybe_direct_reload(reg, slot) - } - - fn func_liveins(&self) -> RegallocSet { - self.liveins.clone() - } - - fn func_liveouts(&self) -> RegallocSet { - self.liveouts.clone() + fn allow_multiple_vreg_defs(&self) -> bool { + // At least the s390x backend requires this, because the + // `Loop` pseudo-instruction aggregates all Operands so pinned + // vregs (RealRegs) may occur more than once. + true } } impl fmt::Debug for VCode { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - writeln!(f, "VCode_Debug {{")?; - writeln!(f, " Entry block: {}", self.entry)?; + writeln!(f, "VCode {{")?; + writeln!(f, " Entry block: {}", self.entry.index())?; + + let mut state = Default::default(); + + let mut alias_keys = self.vreg_aliases.keys().cloned().collect::>(); + alias_keys.sort_unstable(); + for key in alias_keys { + let dest = self.vreg_aliases.get(&key).unwrap(); + writeln!(f, " {:?} := {:?}", Reg::from(key), Reg::from(*dest))?; + } for block in 0..self.num_blocks() { - writeln!(f, "Block {}:", block,)?; - for succ in self.succs(block as BlockIndex) { - writeln!(f, " (successor: Block {})", succ.get())?; + let block = BlockIndex::new(block); + writeln!(f, "Block {}:", block.index())?; + if let Some(bb) = self.bindex_to_bb(block) { + writeln!(f, " (original IR block: {})", bb)?; } - let (start, end) = self.block_ranges[block]; - writeln!(f, " (instruction range: {} .. {})", start, end)?; - for inst in start..end { - writeln!(f, " Inst {}: {:?}", inst, self.insts[inst as usize])?; + for succ in self.succs(block) { + writeln!(f, " (successor: Block {})", succ.index())?; + } + let (start, end) = self.block_ranges[block.index()]; + writeln!( + f, + " (instruction range: {} .. {})", + start.index(), + end.index() + )?; + for inst in start.index()..end.index() { + writeln!( + f, + " Inst {}: {}", + inst, + self.insts[inst].pretty_print_inst(&[], &mut state) + )?; } } @@ -787,57 +1292,6 @@ impl fmt::Debug for VCode { } } -/// Pretty-printing with `RealRegUniverse` context. -impl PrettyPrint for VCode { - fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String { - use std::fmt::Write; - - let mut s = String::new(); - write!(&mut s, "VCode_ShowWithRRU {{{{\n").unwrap(); - write!(&mut s, " Entry block: {}\n", self.entry).unwrap(); - - let mut state = Default::default(); - let mut safepoint_idx = 0; - for i in 0..self.num_blocks() { - let block = i as BlockIndex; - - write!(&mut s, "Block {}:\n", block).unwrap(); - if let Some(bb) = self.bindex_to_bb(block) { - write!(&mut s, " (original IR block: {})\n", bb).unwrap(); - } - for succ in self.succs(block) { - write!(&mut s, " (successor: Block {})\n", succ.get()).unwrap(); - } - let (start, end) = self.block_ranges[block as usize]; - write!(&mut s, " (instruction range: {} .. {})\n", start, end).unwrap(); - for inst in start..end { - if safepoint_idx < self.safepoint_insns.len() - && self.safepoint_insns[safepoint_idx] == inst - { - write!( - &mut s, - " (safepoint: slots {:?} with EmitState {:?})\n", - self.safepoint_slots[safepoint_idx], state, - ) - .unwrap(); - safepoint_idx += 1; - } - write!( - &mut s, - " Inst {}: {}\n", - inst, - self.insts[inst as usize].pretty_print(mb_rru, &mut state) - ) - .unwrap(); - } - } - - write!(&mut s, "}}}}\n").unwrap(); - - s - } -} - /// This structure tracks the large constants used in VCode that will be emitted separately by the /// [MachBuffer]. /// diff --git a/cranelift/codegen/src/prelude.isle b/cranelift/codegen/src/prelude.isle index 044e24930f..9922130762 100644 --- a/cranelift/codegen/src/prelude.isle +++ b/cranelift/codegen/src/prelude.isle @@ -397,9 +397,6 @@ (decl emit (MInst) Unit) (extern constructor emit emit) -(decl emit_safepoint (MInst) Unit) -(extern constructor emit_safepoint emit_safepoint) - ;;;; Helpers for Side-Effectful Instructions Without Results ;;;;;;;;;;;;;;;;;;; (type SideEffectNoResult (enum (Inst (inst MInst)))) @@ -411,12 +408,6 @@ (let ((_ Unit (emit inst))) (output_none))) -;; Similarly, but emit the side-effectful instruction as a safepoint. -(decl safepoint (SideEffectNoResult) InstOutput) -(rule (safepoint (SideEffectNoResult.Inst inst)) - (let ((_ Unit (emit_safepoint inst))) - (output_none))) - ;;;; Helpers for Working with Flags ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; Newtype wrapper around `MInst` for instructions that are used for their diff --git a/cranelift/codegen/src/result.rs b/cranelift/codegen/src/result.rs index 3178cd5ba9..d8b7320c54 100644 --- a/cranelift/codegen/src/result.rs +++ b/cranelift/codegen/src/result.rs @@ -1,12 +1,14 @@ //! Result and error types representing the outcome of compiling a function. +use regalloc2::checker::CheckerErrors; + use crate::verifier::VerifierErrors; use std::string::String; /// A compilation error. /// /// When Cranelift fails to compile a function, it will return one of these error codes. -#[derive(Debug, PartialEq, Eq)] +#[derive(Debug)] pub enum CodegenError { /// A list of IR verifier errors. /// @@ -36,6 +38,9 @@ pub enum CodegenError { /// A failure to map Cranelift register representation to a DWARF register representation. #[cfg(feature = "unwind")] RegisterMappingError(crate::isa::unwind::systemv::RegisterMappingError), + + /// Register allocator internal error discovered by the symbolic checker. + Regalloc(CheckerErrors), } /// A convenient alias for a `Result` that uses `CodegenError` as the error type. @@ -52,6 +57,7 @@ impl std::error::Error for CodegenError { | CodegenError::Unsupported { .. } => None, #[cfg(feature = "unwind")] CodegenError::RegisterMappingError { .. } => None, + CodegenError::Regalloc(..) => None, } } } @@ -65,6 +71,7 @@ impl std::fmt::Display for CodegenError { CodegenError::Unsupported(feature) => write!(f, "Unsupported feature: {}", feature), #[cfg(feature = "unwind")] CodegenError::RegisterMappingError(_0) => write!(f, "Register mapping error"), + CodegenError::Regalloc(errors) => write!(f, "Regalloc validation errors: {:?}", errors), } } } diff --git a/cranelift/codegen/src/settings.rs b/cranelift/codegen/src/settings.rs index 1842e543fe..5a9f27c746 100644 --- a/cranelift/codegen/src/settings.rs +++ b/cranelift/codegen/src/settings.rs @@ -510,7 +510,6 @@ mod tests { assert_eq!( f.to_string(), r#"[shared] -regalloc = "backtracking" opt_level = "none" tls_model = "none" libcall_call_conv = "isa_default" diff --git a/cranelift/codegen/src/value_label.rs b/cranelift/codegen/src/value_label.rs index 459fa62d72..ab5276909a 100644 --- a/cranelift/codegen/src/value_label.rs +++ b/cranelift/codegen/src/value_label.rs @@ -1,10 +1,10 @@ use crate::ir::{SourceLoc, ValueLabel}; +use crate::machinst::Reg; use crate::HashMap; use alloc::vec::Vec; use core::cmp::Ordering; use core::convert::From; use core::ops::Deref; -use regalloc::Reg; #[cfg(feature = "enable-serde")] use serde::{Deserialize, Serialize}; diff --git a/cranelift/filetests/filetests/isa/aarch64/amodes.clif b/cranelift/filetests/filetests/isa/aarch64/amodes.clif index bc6636429a..c3254cc946 100644 --- a/cranelift/filetests/filetests/isa/aarch64/amodes.clif +++ b/cranelift/filetests/filetests/isa/aarch64/amodes.clif @@ -10,14 +10,9 @@ block0(v0: i64, v1: i32): return v4 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: ldr w0, [x0, w1, SXTW] -; Inst 1: ret -; }} +; block0: +; ldr w0, [x0, w1, SXTW] +; ret function %f6(i64, i32) -> i32 { block0(v0: i64, v1: i32): @@ -27,14 +22,9 @@ block0(v0: i64, v1: i32): return v4 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: ldr w0, [x0, w1, SXTW] -; Inst 1: ret -; }} +; block0: +; ldr w0, [x0, w1, SXTW] +; ret function %f7(i32, i32) -> i32 { block0(v0: i32, v1: i32): @@ -45,15 +35,10 @@ block0(v0: i32, v1: i32): return v5 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 3) -; Inst 0: mov w0, w0 -; Inst 1: ldr w0, [x0, w1, UXTW] -; Inst 2: ret -; }} +; block0: +; mov w6, w0 +; ldr w0, [x6, w1, UXTW] +; ret function %f8(i64, i32) -> i32 { block0(v0: i64, v1: i32): @@ -66,17 +51,12 @@ block0(v0: i64, v1: i32): return v7 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 5) -; Inst 0: add x2, x0, #68 -; Inst 1: add x0, x2, x0 -; Inst 2: add x0, x0, x1, SXTW -; Inst 3: ldr w0, [x0, w1, SXTW] -; Inst 4: ret -; }} +; block0: +; add x6, x0, #68 +; add x6, x6, x0 +; add x6, x6, x1, SXTW +; ldr w0, [x6, w1, SXTW] +; ret function %f9(i64, i64, i64) -> i32 { block0(v0: i64, v1: i64, v2: i64): @@ -88,16 +68,11 @@ block0(v0: i64, v1: i64, v2: i64): return v7 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 4) -; Inst 0: add x0, x0, x2 -; Inst 1: add x0, x0, x1 -; Inst 2: ldur w0, [x0, #48] -; Inst 3: ret -; }} +; block0: +; add x0, x0, x2 +; add x0, x0, x1 +; ldr w0, [x0, #48] +; ret function %f10(i64, i64, i64) -> i32 { block0(v0: i64, v1: i64, v2: i64): @@ -109,17 +84,12 @@ block0(v0: i64, v1: i64, v2: i64): return v7 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 5) -; Inst 0: movz x3, #4100 -; Inst 1: add x1, x3, x1 -; Inst 2: add x1, x1, x2 -; Inst 3: ldr w0, [x1, x0] -; Inst 4: ret -; }} +; block0: +; movz x8, #4100 +; add x8, x8, x1 +; add x8, x8, x2 +; ldr w0, [x8, x0] +; ret function %f10() -> i32 { block0: @@ -128,15 +98,10 @@ block0: return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 3) -; Inst 0: movz x0, #1234 -; Inst 1: ldr w0, [x0] -; Inst 2: ret -; }} +; block0: +; movz x2, #1234 +; ldr w0, [x2] +; ret function %f11(i64) -> i32 { block0(v0: i64): @@ -146,15 +111,10 @@ block0(v0: i64): return v3 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 3) -; Inst 0: add x0, x0, #8388608 -; Inst 1: ldr w0, [x0] -; Inst 2: ret -; }} +; block0: +; add x4, x0, #8388608 +; ldr w0, [x4] +; ret function %f12(i64) -> i32 { block0(v0: i64): @@ -164,15 +124,10 @@ block0(v0: i64): return v3 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 3) -; Inst 0: sub x0, x0, #4 -; Inst 1: ldr w0, [x0] -; Inst 2: ret -; }} +; block0: +; sub x4, x0, #4 +; ldr w0, [x4] +; ret function %f13(i64) -> i32 { block0(v0: i64): @@ -182,17 +137,12 @@ block0(v0: i64): return v3 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 5) -; Inst 0: movz w1, #51712 -; Inst 1: movk w1, #15258, LSL #16 -; Inst 2: add x0, x1, x0 -; Inst 3: ldr w0, [x0] -; Inst 4: ret -; }} +; block0: +; movz w4, #51712 +; movk w4, #15258, LSL #16 +; add x4, x4, x0 +; ldr w0, [x4] +; ret function %f14(i32) -> i32 { block0(v0: i32): @@ -201,15 +151,10 @@ block0(v0: i32): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 3) -; Inst 0: sxtw x0, w0 -; Inst 1: ldr w0, [x0] -; Inst 2: ret -; }} +; block0: +; sxtw x4, w0 +; ldr w0, [x4] +; ret function %f15(i32, i32) -> i32 { block0(v0: i32, v1: i32): @@ -220,15 +165,10 @@ block0(v0: i32, v1: i32): return v5 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 3) -; Inst 0: sxtw x0, w0 -; Inst 1: ldr w0, [x0, w1, SXTW] -; Inst 2: ret -; }} +; block0: +; sxtw x6, w0 +; ldr w0, [x6, w1, SXTW] +; ret function %f18(i64, i64, i64) -> i32 { block0(v0: i64, v1: i64, v2: i64): @@ -238,15 +178,10 @@ block0(v0: i64, v1: i64, v2: i64): return v5 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 3) -; Inst 0: movn w0, #4097 -; Inst 1: ldrsh x0, [x0] -; Inst 2: ret -; }} +; block0: +; movn w8, #4097 +; ldrsh x0, [x8] +; ret function %f19(i64, i64, i64) -> i32 { block0(v0: i64, v1: i64, v2: i64): @@ -256,15 +191,10 @@ block0(v0: i64, v1: i64, v2: i64): return v5 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 3) -; Inst 0: movz x0, #4098 -; Inst 1: ldrsh x0, [x0] -; Inst 2: ret -; }} +; block0: +; movz x8, #4098 +; ldrsh x0, [x8] +; ret function %f20(i64, i64, i64) -> i32 { block0(v0: i64, v1: i64, v2: i64): @@ -274,16 +204,11 @@ block0(v0: i64, v1: i64, v2: i64): return v5 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 4) -; Inst 0: movn w0, #4097 -; Inst 1: sxtw x0, w0 -; Inst 2: ldrsh x0, [x0] -; Inst 3: ret -; }} +; block0: +; movn w8, #4097 +; sxtw x10, w8 +; ldrsh x0, [x10] +; ret function %f21(i64, i64, i64) -> i32 { block0(v0: i64, v1: i64, v2: i64): @@ -293,16 +218,11 @@ block0(v0: i64, v1: i64, v2: i64): return v5 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 4) -; Inst 0: movz x0, #4098 -; Inst 1: sxtw x0, w0 -; Inst 2: ldrsh x0, [x0] -; Inst 3: ret -; }} +; block0: +; movz x8, #4098 +; sxtw x10, w8 +; ldrsh x0, [x10] +; ret function %i128(i64) -> i128 { block0(v0: i64): @@ -311,17 +231,13 @@ block0(v0: i64): return v1 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 5) -; Inst 0: mov x1, x0 -; Inst 1: ldp x2, x1, [x1] -; Inst 2: stp x2, x1, [x0] -; Inst 3: mov x0, x2 -; Inst 4: ret -; }} +; block0: +; mov x8, x0 +; ldp x3, x1, [x8] +; mov x11, x3 +; stp x11, x1, [x0] +; mov x0, x3 +; ret function %i128_imm_offset(i64) -> i128 { block0(v0: i64): @@ -330,17 +246,13 @@ block0(v0: i64): return v1 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 5) -; Inst 0: mov x1, x0 -; Inst 1: ldp x2, x1, [x1, #16] -; Inst 2: stp x2, x1, [x0, #16] -; Inst 3: mov x0, x2 -; Inst 4: ret -; }} +; block0: +; mov x8, x0 +; ldp x3, x1, [x8, #16] +; mov x11, x3 +; stp x11, x1, [x0, #16] +; mov x0, x3 +; ret function %i128_imm_offset_large(i64) -> i128 { block0(v0: i64): @@ -349,17 +261,13 @@ block0(v0: i64): return v1 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 5) -; Inst 0: mov x1, x0 -; Inst 1: ldp x2, x1, [x1, #504] -; Inst 2: stp x2, x1, [x0, #504] -; Inst 3: mov x0, x2 -; Inst 4: ret -; }} +; block0: +; mov x8, x0 +; ldp x3, x1, [x8, #504] +; mov x11, x3 +; stp x11, x1, [x0, #504] +; mov x0, x3 +; ret function %i128_imm_offset_negative_large(i64) -> i128 { block0(v0: i64): @@ -368,17 +276,13 @@ block0(v0: i64): return v1 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 5) -; Inst 0: mov x1, x0 -; Inst 1: ldp x2, x1, [x1, #-512] -; Inst 2: stp x2, x1, [x0, #-512] -; Inst 3: mov x0, x2 -; Inst 4: ret -; }} +; block0: +; mov x8, x0 +; ldp x3, x1, [x8, #-512] +; mov x11, x3 +; stp x11, x1, [x0, #-512] +; mov x0, x3 +; ret function %i128_add_offset(i64) -> i128 { block0(v0: i64): @@ -388,17 +292,13 @@ block0(v0: i64): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 5) -; Inst 0: mov x1, x0 -; Inst 1: ldp x2, x1, [x1, #32] -; Inst 2: stp x2, x1, [x0, #32] -; Inst 3: mov x0, x2 -; Inst 4: ret -; }} +; block0: +; mov x8, x0 +; ldp x3, x1, [x8, #32] +; mov x11, x3 +; stp x11, x1, [x0, #32] +; mov x0, x3 +; ret function %i128_32bit_sextend_simple(i32) -> i128 { block0(v0: i32): @@ -408,18 +308,13 @@ block0(v0: i32): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 6) -; Inst 0: sxtw x1, w0 -; Inst 1: ldp x2, x1, [x1] -; Inst 2: sxtw x0, w0 -; Inst 3: stp x2, x1, [x0] -; Inst 4: mov x0, x2 -; Inst 5: ret -; }} +; block0: +; sxtw x8, w0 +; ldp x4, x1, [x8] +; sxtw x9, w0 +; mov x0, x4 +; stp x0, x1, [x9] +; ret function %i128_32bit_sextend(i64, i32) -> i128 { block0(v0: i64, v1: i32): @@ -431,18 +326,14 @@ block0(v0: i64, v1: i32): return v5 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 8) -; Inst 0: mov x2, x0 -; Inst 1: add x2, x2, x1, SXTW -; Inst 2: ldp x3, x2, [x2, #24] -; Inst 3: add x0, x0, x1, SXTW -; Inst 4: stp x3, x2, [x0, #24] -; Inst 5: mov x0, x3 -; Inst 6: mov x1, x2 -; Inst 7: ret -; }} +; block0: +; mov x10, x0 +; add x10, x10, x1, SXTW +; ldp x6, x7, [x10, #24] +; add x0, x0, x1, SXTW +; mov x15, x6 +; mov x1, x7 +; stp x15, x1, [x0, #24] +; mov x0, x6 +; ret diff --git a/cranelift/filetests/filetests/isa/aarch64/arithmetic.clif b/cranelift/filetests/filetests/isa/aarch64/arithmetic.clif index 939f0e6390..1491b28604 100644 --- a/cranelift/filetests/filetests/isa/aarch64/arithmetic.clif +++ b/cranelift/filetests/filetests/isa/aarch64/arithmetic.clif @@ -8,14 +8,9 @@ block0(v0: i64, v1: i64): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: add x0, x0, x1 -; Inst 1: ret -; }} +; block0: +; add x0, x0, x1 +; ret function %f2(i64, i64) -> i64 { block0(v0: i64, v1: i64): @@ -23,14 +18,9 @@ block0(v0: i64, v1: i64): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: sub x0, x0, x1 -; Inst 1: ret -; }} +; block0: +; sub x0, x0, x1 +; ret function %f3(i64, i64) -> i64 { block0(v0: i64, v1: i64): @@ -38,14 +28,9 @@ block0(v0: i64, v1: i64): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: madd x0, x0, x1, xzr -; Inst 1: ret -; }} +; block0: +; madd x0, x0, x1, xzr +; ret function %f4(i64, i64) -> i64 { block0(v0: i64, v1: i64): @@ -53,14 +38,9 @@ block0(v0: i64, v1: i64): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: umulh x0, x0, x1 -; Inst 1: ret -; }} +; block0: +; umulh x0, x0, x1 +; ret function %f5(i64, i64) -> i64 { block0(v0: i64, v1: i64): @@ -68,14 +48,9 @@ block0(v0: i64, v1: i64): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: smulh x0, x0, x1 -; Inst 1: ret -; }} +; block0: +; smulh x0, x0, x1 +; ret function %f6(i64, i64) -> i64 { block0(v0: i64, v1: i64): @@ -83,18 +58,13 @@ block0(v0: i64, v1: i64): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 6) -; Inst 0: cbnz x1, 8 ; udf -; Inst 1: adds xzr, x1, #1 -; Inst 2: ccmp x0, #1, #nzcv, eq -; Inst 3: b.vc 8 ; udf -; Inst 4: sdiv x0, x0, x1 -; Inst 5: ret -; }} +; block0: +; cbnz x1, 8 ; udf +; adds xzr, x1, #1 +; ccmp x0, #1, #nzcv, eq +; b.vc 8 ; udf +; sdiv x0, x0, x1 +; ret function %f7(i64) -> i64 { block0(v0: i64): @@ -103,15 +73,10 @@ block0(v0: i64): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 3) -; Inst 0: orr x1, xzr, #2 -; Inst 1: sdiv x0, x0, x1 -; Inst 2: ret -; }} +; block0: +; orr x3, xzr, #2 +; sdiv x0, x0, x3 +; ret function %f8(i64, i64) -> i64 { block0(v0: i64, v1: i64): @@ -119,15 +84,10 @@ block0(v0: i64, v1: i64): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 3) -; Inst 0: cbnz x1, 8 ; udf -; Inst 1: udiv x0, x0, x1 -; Inst 2: ret -; }} +; block0: +; cbnz x1, 8 ; udf +; udiv x0, x0, x1 +; ret function %f9(i64) -> i64 { block0(v0: i64): @@ -136,15 +96,10 @@ block0(v0: i64): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 3) -; Inst 0: orr x1, xzr, #2 -; Inst 1: udiv x0, x0, x1 -; Inst 2: ret -; }} +; block0: +; orr x3, xzr, #2 +; udiv x0, x0, x3 +; ret function %f10(i64, i64) -> i64 { block0(v0: i64, v1: i64): @@ -152,16 +107,11 @@ block0(v0: i64, v1: i64): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 4) -; Inst 0: cbnz x1, 8 ; udf -; Inst 1: sdiv x2, x0, x1 -; Inst 2: msub x0, x2, x1, x0 -; Inst 3: ret -; }} +; block0: +; cbnz x1, 8 ; udf +; sdiv x6, x0, x1 +; msub x0, x6, x1, x0 +; ret function %f11(i64, i64) -> i64 { block0(v0: i64, v1: i64): @@ -169,16 +119,11 @@ block0(v0: i64, v1: i64): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 4) -; Inst 0: cbnz x1, 8 ; udf -; Inst 1: udiv x2, x0, x1 -; Inst 2: msub x0, x2, x1, x0 -; Inst 3: ret -; }} +; block0: +; cbnz x1, 8 ; udf +; udiv x6, x0, x1 +; msub x0, x6, x1, x0 +; ret function %f12(i32, i32) -> i32 { block0(v0: i32, v1: i32): @@ -186,20 +131,15 @@ block0(v0: i32, v1: i32): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 8) -; Inst 0: sxtw x0, w0 -; Inst 1: sxtw x1, w1 -; Inst 2: cbnz x1, 8 ; udf -; Inst 3: adds wzr, w1, #1 -; Inst 4: ccmp w0, #1, #nzcv, eq -; Inst 5: b.vc 8 ; udf -; Inst 6: sdiv x0, x0, x1 -; Inst 7: ret -; }} +; block0: +; sxtw x5, w0 +; sxtw x7, w1 +; cbnz x7, 8 ; udf +; adds wzr, w7, #1 +; ccmp w5, #1, #nzcv, eq +; b.vc 8 ; udf +; sdiv x0, x5, x7 +; ret function %f13(i32) -> i32 { block0(v0: i32): @@ -208,16 +148,11 @@ block0(v0: i32): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 4) -; Inst 0: sxtw x0, w0 -; Inst 1: orr x1, xzr, #2 -; Inst 2: sdiv x0, x0, x1 -; Inst 3: ret -; }} +; block0: +; sxtw x3, w0 +; orr x5, xzr, #2 +; sdiv x0, x3, x5 +; ret function %f14(i32, i32) -> i32 { block0(v0: i32, v1: i32): @@ -225,17 +160,12 @@ block0(v0: i32, v1: i32): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 5) -; Inst 0: mov w0, w0 -; Inst 1: mov w1, w1 -; Inst 2: cbnz x1, 8 ; udf -; Inst 3: udiv x0, x0, x1 -; Inst 4: ret -; }} +; block0: +; mov w5, w0 +; mov w7, w1 +; cbnz x7, 8 ; udf +; udiv x0, x5, x7 +; ret function %f15(i32) -> i32 { block0(v0: i32): @@ -244,16 +174,11 @@ block0(v0: i32): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 4) -; Inst 0: mov w0, w0 -; Inst 1: orr x1, xzr, #2 -; Inst 2: udiv x0, x0, x1 -; Inst 3: ret -; }} +; block0: +; mov w3, w0 +; orr x5, xzr, #2 +; udiv x0, x3, x5 +; ret function %f16(i32, i32) -> i32 { block0(v0: i32, v1: i32): @@ -261,18 +186,13 @@ block0(v0: i32, v1: i32): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 6) -; Inst 0: sxtw x0, w0 -; Inst 1: sxtw x1, w1 -; Inst 2: cbnz x1, 8 ; udf -; Inst 3: sdiv x2, x0, x1 -; Inst 4: msub x0, x2, x1, x0 -; Inst 5: ret -; }} +; block0: +; sxtw x5, w0 +; sxtw x7, w1 +; cbnz x7, 8 ; udf +; sdiv x10, x5, x7 +; msub x0, x10, x7, x5 +; ret function %f17(i32, i32) -> i32 { block0(v0: i32, v1: i32): @@ -280,18 +200,13 @@ block0(v0: i32, v1: i32): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 6) -; Inst 0: mov w0, w0 -; Inst 1: mov w1, w1 -; Inst 2: cbnz x1, 8 ; udf -; Inst 3: udiv x2, x0, x1 -; Inst 4: msub x0, x2, x1, x0 -; Inst 5: ret -; }} +; block0: +; mov w5, w0 +; mov w7, w1 +; cbnz x7, 8 ; udf +; udiv x10, x5, x7 +; msub x0, x10, x7, x5 +; ret function %f18(i64, i64) -> i64 { block0(v0: i64, v1: i64): @@ -299,14 +214,9 @@ block0(v0: i64, v1: i64): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: and x0, x0, x1 -; Inst 1: ret -; }} +; block0: +; and x0, x0, x1 +; ret function %f19(i64, i64) -> i64 { block0(v0: i64, v1: i64): @@ -314,14 +224,9 @@ block0(v0: i64, v1: i64): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: orr x0, x0, x1 -; Inst 1: ret -; }} +; block0: +; orr x0, x0, x1 +; ret function %f20(i64, i64) -> i64 { block0(v0: i64, v1: i64): @@ -329,14 +234,9 @@ block0(v0: i64, v1: i64): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: eor x0, x0, x1 -; Inst 1: ret -; }} +; block0: +; eor x0, x0, x1 +; ret function %f21(i64, i64) -> i64 { block0(v0: i64, v1: i64): @@ -344,14 +244,9 @@ block0(v0: i64, v1: i64): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: bic x0, x0, x1 -; Inst 1: ret -; }} +; block0: +; bic x0, x0, x1 +; ret function %f22(i64, i64) -> i64 { block0(v0: i64, v1: i64): @@ -359,14 +254,9 @@ block0(v0: i64, v1: i64): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: orn x0, x0, x1 -; Inst 1: ret -; }} +; block0: +; orn x0, x0, x1 +; ret function %f23(i64, i64) -> i64 { block0(v0: i64, v1: i64): @@ -374,14 +264,9 @@ block0(v0: i64, v1: i64): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: eon x0, x0, x1 -; Inst 1: ret -; }} +; block0: +; eon x0, x0, x1 +; ret function %f24(i64, i64) -> i64 { block0(v0: i64, v1: i64): @@ -389,14 +274,9 @@ block0(v0: i64, v1: i64): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: orn x0, xzr, x0 -; Inst 1: ret -; }} +; block0: +; orn x0, xzr, x0 +; ret function %f25(i32, i32) -> i32 { block0(v0: i32, v1: i32): @@ -406,14 +286,9 @@ block0(v0: i32, v1: i32): return v4 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: sub w0, w1, w0, LSL 21 -; Inst 1: ret -; }} +; block0: +; sub w0, w1, w0, LSL 21 +; ret function %f26(i32) -> i32 { block0(v0: i32): @@ -422,14 +297,9 @@ block0(v0: i32): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: sub w0, w0, #1 -; Inst 1: ret -; }} +; block0: +; sub w0, w0, #1 +; ret function %f27(i32) -> i32 { block0(v0: i32): @@ -438,14 +308,9 @@ block0(v0: i32): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: add w0, w0, #1 -; Inst 1: ret -; }} +; block0: +; add w0, w0, #1 +; ret function %f28(i64) -> i64 { block0(v0: i64): @@ -454,14 +319,9 @@ block0(v0: i64): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: add x0, x0, #1 -; Inst 1: ret -; }} +; block0: +; add x0, x0, #1 +; ret function %f29(i64) -> i64 { block0(v0: i64): @@ -470,15 +330,10 @@ block0(v0: i64): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 3) -; Inst 0: movz x0, #1 -; Inst 1: sub x0, xzr, x0 -; Inst 2: ret -; }} +; block0: +; movz x3, #1 +; sub x0, xzr, x3 +; ret function %f30(i8x16) -> i8x16 { block0(v0: i8x16): @@ -487,17 +342,12 @@ block0(v0: i8x16): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 5) -; Inst 0: movz x0, #1 -; Inst 1: sub w0, wzr, w0 -; Inst 2: dup v1.16b, w0 -; Inst 3: ushl v0.16b, v0.16b, v1.16b -; Inst 4: ret -; }} +; block0: +; movz x3, #1 +; sub w5, wzr, w3 +; dup v7.16b, w5 +; ushl v0.16b, v0.16b, v7.16b +; ret function %add_i128(i128, i128) -> i128 { block0(v0: i128, v1: i128): @@ -505,15 +355,10 @@ block0(v0: i128, v1: i128): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 3) -; Inst 0: adds x0, x0, x2 -; Inst 1: adc x1, x1, x3 -; Inst 2: ret -; }} +; block0: +; adds x0, x0, x2 +; adc x1, x1, x3 +; ret function %sub_i128(i128, i128) -> i128 { block0(v0: i128, v1: i128): @@ -521,15 +366,10 @@ block0(v0: i128, v1: i128): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 3) -; Inst 0: subs x0, x0, x2 -; Inst 1: sbc x1, x1, x3 -; Inst 2: ret -; }} +; block0: +; subs x0, x0, x2 +; sbc x1, x1, x3 +; ret function %mul_i128(i128, i128) -> i128 { block0(v0: i128, v1: i128): @@ -537,17 +377,12 @@ block0(v0: i128, v1: i128): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 5) -; Inst 0: umulh x4, x0, x2 -; Inst 1: madd x3, x0, x3, x4 -; Inst 2: madd x1, x1, x2, x3 -; Inst 3: madd x0, x0, x2, xzr -; Inst 4: ret -; }} +; block0: +; umulh x10, x0, x2 +; madd x12, x0, x3, x10 +; madd x1, x1, x2, x12 +; madd x0, x0, x2, xzr +; ret function %add_mul_1(i32, i32, i32) -> i32 { block0(v0: i32, v1: i32, v2: i32): @@ -556,14 +391,9 @@ block0(v0: i32, v1: i32, v2: i32): return v4 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: madd w0, w1, w2, w0 -; Inst 1: ret -; }} +; block0: +; madd w0, w1, w2, w0 +; ret function %add_mul_2(i32, i32, i32) -> i32 { block0(v0: i32, v1: i32, v2: i32): @@ -572,14 +402,9 @@ block0(v0: i32, v1: i32, v2: i32): return v4 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: madd w0, w1, w2, w0 -; Inst 1: ret -; }} +; block0: +; madd w0, w1, w2, w0 +; ret function %srem_const (i64) -> i64 { block0(v0: i64): @@ -588,16 +413,11 @@ block0(v0: i64): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 4) -; Inst 0: orr x1, xzr, #2 -; Inst 1: sdiv x2, x0, x1 -; Inst 2: msub x0, x2, x1, x0 -; Inst 3: ret -; }} +; block0: +; orr x3, xzr, #2 +; sdiv x5, x0, x3 +; msub x0, x5, x3, x0 +; ret function %urem_const (i64) -> i64 { block0(v0: i64): @@ -606,16 +426,11 @@ block0(v0: i64): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 4) -; Inst 0: orr x1, xzr, #2 -; Inst 1: udiv x2, x0, x1 -; Inst 2: msub x0, x2, x1, x0 -; Inst 3: ret -; }} +; block0: +; orr x3, xzr, #2 +; udiv x5, x0, x3 +; msub x0, x5, x3, x0 +; ret function %sdiv_minus_one(i64) -> i64 { block0(v0: i64): @@ -624,16 +439,11 @@ block0(v0: i64): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 6) -; Inst 0: movn x1, #0 -; Inst 1: adds xzr, x1, #1 -; Inst 2: ccmp x0, #1, #nzcv, eq -; Inst 3: b.vc 8 ; udf -; Inst 4: sdiv x0, x0, x1 -; Inst 5: ret -; }} +; block0: +; movn x3, #0 +; adds xzr, x3, #1 +; ccmp x0, #1, #nzcv, eq +; b.vc 8 ; udf +; sdiv x0, x0, x3 +; ret diff --git a/cranelift/filetests/filetests/isa/aarch64/atomic-rmw-lse.clif b/cranelift/filetests/filetests/isa/aarch64/atomic-rmw-lse.clif index ca3c26fbcc..96cbe97cd9 100644 --- a/cranelift/filetests/filetests/isa/aarch64/atomic-rmw-lse.clif +++ b/cranelift/filetests/filetests/isa/aarch64/atomic-rmw-lse.clif @@ -7,14 +7,9 @@ block0(v0: i64, v1: i64): return } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: ldaddal x1, x0, [x0] -; Inst 1: ret -; }} +; block0: +; ldaddal x1, x4, [x0] +; ret function %atomic_rmw_add_i32(i64, i32) { block0(v0: i64, v1: i32): @@ -22,14 +17,9 @@ block0(v0: i64, v1: i32): return } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: ldaddal w1, w0, [x0] -; Inst 1: ret -; }} +; block0: +; ldaddal w1, w4, [x0] +; ret function %atomic_rmw_add_i16(i64, i16) { block0(v0: i64, v1: i16): @@ -37,14 +27,9 @@ block0(v0: i64, v1: i16): return } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: ldaddalh w1, w0, [x0] -; Inst 1: ret -; }} +; block0: +; ldaddalh w1, w4, [x0] +; ret function %atomic_rmw_add_i8(i64, i8) { block0(v0: i64, v1: i8): @@ -52,14 +37,9 @@ block0(v0: i64, v1: i8): return } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: ldaddalb w1, w0, [x0] -; Inst 1: ret -; }} +; block0: +; ldaddalb w1, w4, [x0] +; ret function %atomic_rmw_and_i64(i64, i64) { block0(v0: i64, v1: i64): @@ -67,14 +47,9 @@ block0(v0: i64, v1: i64): return } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: ldclral x1, x0, [x0] -; Inst 1: ret -; }} +; block0: +; ldclral x1, x4, [x0] +; ret function %atomic_rmw_and_i32(i64, i32) { block0(v0: i64, v1: i32): @@ -82,14 +57,9 @@ block0(v0: i64, v1: i32): return } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: ldclral w1, w0, [x0] -; Inst 1: ret -; }} +; block0: +; ldclral w1, w4, [x0] +; ret function %atomic_rmw_and_i16(i64, i16) { block0(v0: i64, v1: i16): @@ -97,14 +67,9 @@ block0(v0: i64, v1: i16): return } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: ldclralh w1, w0, [x0] -; Inst 1: ret -; }} +; block0: +; ldclralh w1, w4, [x0] +; ret function %atomic_rmw_and_i8(i64, i8) { block0(v0: i64, v1: i8): @@ -112,14 +77,9 @@ block0(v0: i64, v1: i8): return } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: ldclralb w1, w0, [x0] -; Inst 1: ret -; }} +; block0: +; ldclralb w1, w4, [x0] +; ret function %atomic_rmw_nand_i64(i64, i64) { block0(v0: i64, v1: i64): @@ -127,25 +87,21 @@ block0(v0: i64, v1: i64): return } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 13) -; Inst 0: stp fp, lr, [sp, #-16]! -; Inst 1: mov fp, sp -; Inst 2: str x28, [sp, #-16]! -; Inst 3: stp x26, x27, [sp, #-16]! -; Inst 4: stp x24, x25, [sp, #-16]! -; Inst 5: mov x25, x0 -; Inst 6: mov x26, x1 -; Inst 7: 1: ldaxr x27, [x25]; and x28, x27, x26; mvn x28, x28; stlxr w24, x28, [x25]; cbnz w24, 1b -; Inst 8: ldp x24, x25, [sp], #16 -; Inst 9: ldp x26, x27, [sp], #16 -; Inst 10: ldr x28, [sp], #16 -; Inst 11: ldp fp, lr, [sp], #16 -; Inst 12: ret -; }} +; stp fp, lr, [sp, #-16]! +; mov fp, sp +; str x28, [sp, #-16]! +; stp x26, x27, [sp, #-16]! +; stp x24, x25, [sp, #-16]! +; block0: +; mov x25, x0 +; mov x4, x1 +; mov x26, x4 +; 1: ldaxr x27, [x25]; and x28, x27, x26; mvn x28, x28; stlxr w24, x28, [x25]; cbnz w24, 1b +; ldp x24, x25, [sp], #16 +; ldp x26, x27, [sp], #16 +; ldr x28, [sp], #16 +; ldp fp, lr, [sp], #16 +; ret function %atomic_rmw_nand_i32(i64, i32) { block0(v0: i64, v1: i32): @@ -153,25 +109,21 @@ block0(v0: i64, v1: i32): return } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 13) -; Inst 0: stp fp, lr, [sp, #-16]! -; Inst 1: mov fp, sp -; Inst 2: str x28, [sp, #-16]! -; Inst 3: stp x26, x27, [sp, #-16]! -; Inst 4: stp x24, x25, [sp, #-16]! -; Inst 5: mov x25, x0 -; Inst 6: mov x26, x1 -; Inst 7: 1: ldaxr w27, [x25]; and w28, w27, w26; mvn w28, w28; stlxr w24, w28, [x25]; cbnz w24, 1b -; Inst 8: ldp x24, x25, [sp], #16 -; Inst 9: ldp x26, x27, [sp], #16 -; Inst 10: ldr x28, [sp], #16 -; Inst 11: ldp fp, lr, [sp], #16 -; Inst 12: ret -; }} +; stp fp, lr, [sp, #-16]! +; mov fp, sp +; str x28, [sp, #-16]! +; stp x26, x27, [sp, #-16]! +; stp x24, x25, [sp, #-16]! +; block0: +; mov x25, x0 +; mov x4, x1 +; mov x26, x4 +; 1: ldaxr w27, [x25]; and w28, w27, w26; mvn w28, w28; stlxr w24, w28, [x25]; cbnz w24, 1b +; ldp x24, x25, [sp], #16 +; ldp x26, x27, [sp], #16 +; ldr x28, [sp], #16 +; ldp fp, lr, [sp], #16 +; ret function %atomic_rmw_nand_i16(i64, i16) { block0(v0: i64, v1: i16): @@ -179,25 +131,21 @@ block0(v0: i64, v1: i16): return } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 13) -; Inst 0: stp fp, lr, [sp, #-16]! -; Inst 1: mov fp, sp -; Inst 2: str x28, [sp, #-16]! -; Inst 3: stp x26, x27, [sp, #-16]! -; Inst 4: stp x24, x25, [sp, #-16]! -; Inst 5: mov x25, x0 -; Inst 6: mov x26, x1 -; Inst 7: 1: ldaxrh w27, [x25]; and w28, w27, w26; mvn w28, w28; stlxrh w24, w28, [x25]; cbnz w24, 1b -; Inst 8: ldp x24, x25, [sp], #16 -; Inst 9: ldp x26, x27, [sp], #16 -; Inst 10: ldr x28, [sp], #16 -; Inst 11: ldp fp, lr, [sp], #16 -; Inst 12: ret -; }} +; stp fp, lr, [sp, #-16]! +; mov fp, sp +; str x28, [sp, #-16]! +; stp x26, x27, [sp, #-16]! +; stp x24, x25, [sp, #-16]! +; block0: +; mov x25, x0 +; mov x4, x1 +; mov x26, x4 +; 1: ldaxrh w27, [x25]; and w28, w27, w26; mvn w28, w28; stlxrh w24, w28, [x25]; cbnz w24, 1b +; ldp x24, x25, [sp], #16 +; ldp x26, x27, [sp], #16 +; ldr x28, [sp], #16 +; ldp fp, lr, [sp], #16 +; ret function %atomic_rmw_nand_i8(i64, i8) { block0(v0: i64, v1: i8): @@ -205,25 +153,21 @@ block0(v0: i64, v1: i8): return } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 13) -; Inst 0: stp fp, lr, [sp, #-16]! -; Inst 1: mov fp, sp -; Inst 2: str x28, [sp, #-16]! -; Inst 3: stp x26, x27, [sp, #-16]! -; Inst 4: stp x24, x25, [sp, #-16]! -; Inst 5: mov x25, x0 -; Inst 6: mov x26, x1 -; Inst 7: 1: ldaxrb w27, [x25]; and w28, w27, w26; mvn w28, w28; stlxrb w24, w28, [x25]; cbnz w24, 1b -; Inst 8: ldp x24, x25, [sp], #16 -; Inst 9: ldp x26, x27, [sp], #16 -; Inst 10: ldr x28, [sp], #16 -; Inst 11: ldp fp, lr, [sp], #16 -; Inst 12: ret -; }} +; stp fp, lr, [sp, #-16]! +; mov fp, sp +; str x28, [sp, #-16]! +; stp x26, x27, [sp, #-16]! +; stp x24, x25, [sp, #-16]! +; block0: +; mov x25, x0 +; mov x4, x1 +; mov x26, x4 +; 1: ldaxrb w27, [x25]; and w28, w27, w26; mvn w28, w28; stlxrb w24, w28, [x25]; cbnz w24, 1b +; ldp x24, x25, [sp], #16 +; ldp x26, x27, [sp], #16 +; ldr x28, [sp], #16 +; ldp fp, lr, [sp], #16 +; ret function %atomic_rmw_or_i64(i64, i64) { block0(v0: i64, v1: i64): @@ -231,14 +175,9 @@ block0(v0: i64, v1: i64): return } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: ldsetal x1, x0, [x0] -; Inst 1: ret -; }} +; block0: +; ldsetal x1, x4, [x0] +; ret function %atomic_rmw_or_i32(i64, i32) { block0(v0: i64, v1: i32): @@ -246,14 +185,9 @@ block0(v0: i64, v1: i32): return } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: ldsetal w1, w0, [x0] -; Inst 1: ret -; }} +; block0: +; ldsetal w1, w4, [x0] +; ret function %atomic_rmw_or_i16(i64, i16) { block0(v0: i64, v1: i16): @@ -261,14 +195,9 @@ block0(v0: i64, v1: i16): return } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: ldsetalh w1, w0, [x0] -; Inst 1: ret -; }} +; block0: +; ldsetalh w1, w4, [x0] +; ret function %atomic_rmw_or_i8(i64, i8) { block0(v0: i64, v1: i8): @@ -276,14 +205,9 @@ block0(v0: i64, v1: i8): return } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: ldsetalb w1, w0, [x0] -; Inst 1: ret -; }} +; block0: +; ldsetalb w1, w4, [x0] +; ret function %atomic_rmw_xor_i64(i64, i64) { block0(v0: i64, v1: i64): @@ -291,14 +215,9 @@ block0(v0: i64, v1: i64): return } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: ldeoral x1, x0, [x0] -; Inst 1: ret -; }} +; block0: +; ldeoral x1, x4, [x0] +; ret function %atomic_rmw_xor_i32(i64, i32) { block0(v0: i64, v1: i32): @@ -306,14 +225,9 @@ block0(v0: i64, v1: i32): return } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: ldeoral w1, w0, [x0] -; Inst 1: ret -; }} +; block0: +; ldeoral w1, w4, [x0] +; ret function %atomic_rmw_xor_i16(i64, i16) { block0(v0: i64, v1: i16): @@ -321,14 +235,9 @@ block0(v0: i64, v1: i16): return } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: ldeoralh w1, w0, [x0] -; Inst 1: ret -; }} +; block0: +; ldeoralh w1, w4, [x0] +; ret function %atomic_rmw_xor_i8(i64, i8) { block0(v0: i64, v1: i8): @@ -336,14 +245,9 @@ block0(v0: i64, v1: i8): return } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: ldeoralb w1, w0, [x0] -; Inst 1: ret -; }} +; block0: +; ldeoralb w1, w4, [x0] +; ret function %atomic_rmw_smax_i64(i64, i64) { block0(v0: i64, v1: i64): @@ -351,14 +255,9 @@ block0(v0: i64, v1: i64): return } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: ldsmaxal x1, x0, [x0] -; Inst 1: ret -; }} +; block0: +; ldsmaxal x1, x4, [x0] +; ret function %atomic_rmw_smax_i32(i64, i32) { block0(v0: i64, v1: i32): @@ -366,14 +265,9 @@ block0(v0: i64, v1: i32): return } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: ldsmaxal w1, w0, [x0] -; Inst 1: ret -; }} +; block0: +; ldsmaxal w1, w4, [x0] +; ret function %atomic_rmw_smax_i16(i64, i16) { block0(v0: i64, v1: i16): @@ -381,14 +275,9 @@ block0(v0: i64, v1: i16): return } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: ldsmaxalh w1, w0, [x0] -; Inst 1: ret -; }} +; block0: +; ldsmaxalh w1, w4, [x0] +; ret function %atomic_rmw_smax_i8(i64, i8) { block0(v0: i64, v1: i8): @@ -396,14 +285,9 @@ block0(v0: i64, v1: i8): return } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: ldsmaxalb w1, w0, [x0] -; Inst 1: ret -; }} +; block0: +; ldsmaxalb w1, w4, [x0] +; ret function %atomic_rmw_umax_i64(i64, i64) { block0(v0: i64, v1: i64): @@ -411,14 +295,9 @@ block0(v0: i64, v1: i64): return } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: ldumaxal x1, x0, [x0] -; Inst 1: ret -; }} +; block0: +; ldumaxal x1, x4, [x0] +; ret function %atomic_rmw_umax_i32(i64, i32) { block0(v0: i64, v1: i32): @@ -426,14 +305,9 @@ block0(v0: i64, v1: i32): return } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: ldumaxal w1, w0, [x0] -; Inst 1: ret -; }} +; block0: +; ldumaxal w1, w4, [x0] +; ret function %atomic_rmw_umax_i16(i64, i16) { block0(v0: i64, v1: i16): @@ -441,14 +315,9 @@ block0(v0: i64, v1: i16): return } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: ldumaxalh w1, w0, [x0] -; Inst 1: ret -; }} +; block0: +; ldumaxalh w1, w4, [x0] +; ret function %atomic_rmw_umax_i8(i64, i8) { block0(v0: i64, v1: i8): @@ -456,14 +325,9 @@ block0(v0: i64, v1: i8): return } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: ldumaxalb w1, w0, [x0] -; Inst 1: ret -; }} +; block0: +; ldumaxalb w1, w4, [x0] +; ret function %atomic_rmw_smin_i64(i64, i64) { block0(v0: i64, v1: i64): @@ -471,14 +335,9 @@ block0(v0: i64, v1: i64): return } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: ldsminal x1, x0, [x0] -; Inst 1: ret -; }} +; block0: +; ldsminal x1, x4, [x0] +; ret function %atomic_rmw_smin_i32(i64, i32) { block0(v0: i64, v1: i32): @@ -486,14 +345,9 @@ block0(v0: i64, v1: i32): return } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: ldsminal w1, w0, [x0] -; Inst 1: ret -; }} +; block0: +; ldsminal w1, w4, [x0] +; ret function %atomic_rmw_smin_i16(i64, i16) { block0(v0: i64, v1: i16): @@ -501,14 +355,9 @@ block0(v0: i64, v1: i16): return } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: ldsminalh w1, w0, [x0] -; Inst 1: ret -; }} +; block0: +; ldsminalh w1, w4, [x0] +; ret function %atomic_rmw_smin_i8(i64, i8) { block0(v0: i64, v1: i8): @@ -516,14 +365,9 @@ block0(v0: i64, v1: i8): return } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: ldsminalb w1, w0, [x0] -; Inst 1: ret -; }} +; block0: +; ldsminalb w1, w4, [x0] +; ret function %atomic_rmw_umin_i64(i64, i64) { block0(v0: i64, v1: i64): @@ -531,14 +375,9 @@ block0(v0: i64, v1: i64): return } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: lduminal x1, x0, [x0] -; Inst 1: ret -; }} +; block0: +; lduminal x1, x4, [x0] +; ret function %atomic_rmw_umin_i32(i64, i32) { block0(v0: i64, v1: i32): @@ -546,14 +385,9 @@ block0(v0: i64, v1: i32): return } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: lduminal w1, w0, [x0] -; Inst 1: ret -; }} +; block0: +; lduminal w1, w4, [x0] +; ret function %atomic_rmw_umin_i16(i64, i16) { block0(v0: i64, v1: i16): @@ -561,14 +395,9 @@ block0(v0: i64, v1: i16): return } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: lduminalh w1, w0, [x0] -; Inst 1: ret -; }} +; block0: +; lduminalh w1, w4, [x0] +; ret function %atomic_rmw_umin_i8(i64, i8) { block0(v0: i64, v1: i8): @@ -576,12 +405,7 @@ block0(v0: i64, v1: i8): return } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: lduminalb w1, w0, [x0] -; Inst 1: ret -; }} +; block0: +; lduminalb w1, w4, [x0] +; ret diff --git a/cranelift/filetests/filetests/isa/aarch64/atomic-rmw.clif b/cranelift/filetests/filetests/isa/aarch64/atomic-rmw.clif index b793cd27ae..1229c08560 100644 --- a/cranelift/filetests/filetests/isa/aarch64/atomic-rmw.clif +++ b/cranelift/filetests/filetests/isa/aarch64/atomic-rmw.clif @@ -7,25 +7,21 @@ block0(v0: i64, v1: i64): return } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 13) -; Inst 0: stp fp, lr, [sp, #-16]! -; Inst 1: mov fp, sp -; Inst 2: str x28, [sp, #-16]! -; Inst 3: stp x26, x27, [sp, #-16]! -; Inst 4: stp x24, x25, [sp, #-16]! -; Inst 5: mov x25, x0 -; Inst 6: mov x26, x1 -; Inst 7: 1: ldaxr x27, [x25]; add x28, x27, x26; stlxr w24, x28, [x25]; cbnz w24, 1b -; Inst 8: ldp x24, x25, [sp], #16 -; Inst 9: ldp x26, x27, [sp], #16 -; Inst 10: ldr x28, [sp], #16 -; Inst 11: ldp fp, lr, [sp], #16 -; Inst 12: ret -; }} +; stp fp, lr, [sp, #-16]! +; mov fp, sp +; str x28, [sp, #-16]! +; stp x26, x27, [sp, #-16]! +; stp x24, x25, [sp, #-16]! +; block0: +; mov x25, x0 +; mov x4, x1 +; mov x26, x4 +; 1: ldaxr x27, [x25]; add x28, x27, x26; stlxr w24, x28, [x25]; cbnz w24, 1b +; ldp x24, x25, [sp], #16 +; ldp x26, x27, [sp], #16 +; ldr x28, [sp], #16 +; ldp fp, lr, [sp], #16 +; ret function %atomic_rmw_add_i32(i64, i32) { block0(v0: i64, v1: i32): @@ -33,25 +29,21 @@ block0(v0: i64, v1: i32): return } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 13) -; Inst 0: stp fp, lr, [sp, #-16]! -; Inst 1: mov fp, sp -; Inst 2: str x28, [sp, #-16]! -; Inst 3: stp x26, x27, [sp, #-16]! -; Inst 4: stp x24, x25, [sp, #-16]! -; Inst 5: mov x25, x0 -; Inst 6: mov x26, x1 -; Inst 7: 1: ldaxr w27, [x25]; add w28, w27, w26; stlxr w24, w28, [x25]; cbnz w24, 1b -; Inst 8: ldp x24, x25, [sp], #16 -; Inst 9: ldp x26, x27, [sp], #16 -; Inst 10: ldr x28, [sp], #16 -; Inst 11: ldp fp, lr, [sp], #16 -; Inst 12: ret -; }} +; stp fp, lr, [sp, #-16]! +; mov fp, sp +; str x28, [sp, #-16]! +; stp x26, x27, [sp, #-16]! +; stp x24, x25, [sp, #-16]! +; block0: +; mov x25, x0 +; mov x4, x1 +; mov x26, x4 +; 1: ldaxr w27, [x25]; add w28, w27, w26; stlxr w24, w28, [x25]; cbnz w24, 1b +; ldp x24, x25, [sp], #16 +; ldp x26, x27, [sp], #16 +; ldr x28, [sp], #16 +; ldp fp, lr, [sp], #16 +; ret function %atomic_rmw_add_i16(i64, i16) { block0(v0: i64, v1: i16): @@ -59,25 +51,21 @@ block0(v0: i64, v1: i16): return } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 13) -; Inst 0: stp fp, lr, [sp, #-16]! -; Inst 1: mov fp, sp -; Inst 2: str x28, [sp, #-16]! -; Inst 3: stp x26, x27, [sp, #-16]! -; Inst 4: stp x24, x25, [sp, #-16]! -; Inst 5: mov x25, x0 -; Inst 6: mov x26, x1 -; Inst 7: 1: ldaxrh w27, [x25]; add w28, w27, w26; stlxrh w24, w28, [x25]; cbnz w24, 1b -; Inst 8: ldp x24, x25, [sp], #16 -; Inst 9: ldp x26, x27, [sp], #16 -; Inst 10: ldr x28, [sp], #16 -; Inst 11: ldp fp, lr, [sp], #16 -; Inst 12: ret -; }} +; stp fp, lr, [sp, #-16]! +; mov fp, sp +; str x28, [sp, #-16]! +; stp x26, x27, [sp, #-16]! +; stp x24, x25, [sp, #-16]! +; block0: +; mov x25, x0 +; mov x4, x1 +; mov x26, x4 +; 1: ldaxrh w27, [x25]; add w28, w27, w26; stlxrh w24, w28, [x25]; cbnz w24, 1b +; ldp x24, x25, [sp], #16 +; ldp x26, x27, [sp], #16 +; ldr x28, [sp], #16 +; ldp fp, lr, [sp], #16 +; ret function %atomic_rmw_add_i8(i64, i8) { block0(v0: i64, v1: i8): @@ -85,25 +73,21 @@ block0(v0: i64, v1: i8): return } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 13) -; Inst 0: stp fp, lr, [sp, #-16]! -; Inst 1: mov fp, sp -; Inst 2: str x28, [sp, #-16]! -; Inst 3: stp x26, x27, [sp, #-16]! -; Inst 4: stp x24, x25, [sp, #-16]! -; Inst 5: mov x25, x0 -; Inst 6: mov x26, x1 -; Inst 7: 1: ldaxrb w27, [x25]; add w28, w27, w26; stlxrb w24, w28, [x25]; cbnz w24, 1b -; Inst 8: ldp x24, x25, [sp], #16 -; Inst 9: ldp x26, x27, [sp], #16 -; Inst 10: ldr x28, [sp], #16 -; Inst 11: ldp fp, lr, [sp], #16 -; Inst 12: ret -; }} +; stp fp, lr, [sp, #-16]! +; mov fp, sp +; str x28, [sp, #-16]! +; stp x26, x27, [sp, #-16]! +; stp x24, x25, [sp, #-16]! +; block0: +; mov x25, x0 +; mov x4, x1 +; mov x26, x4 +; 1: ldaxrb w27, [x25]; add w28, w27, w26; stlxrb w24, w28, [x25]; cbnz w24, 1b +; ldp x24, x25, [sp], #16 +; ldp x26, x27, [sp], #16 +; ldr x28, [sp], #16 +; ldp fp, lr, [sp], #16 +; ret function %atomic_rmw_and_i64(i64, i64) { block0(v0: i64, v1: i64): @@ -111,25 +95,21 @@ block0(v0: i64, v1: i64): return } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 13) -; Inst 0: stp fp, lr, [sp, #-16]! -; Inst 1: mov fp, sp -; Inst 2: str x28, [sp, #-16]! -; Inst 3: stp x26, x27, [sp, #-16]! -; Inst 4: stp x24, x25, [sp, #-16]! -; Inst 5: mov x25, x0 -; Inst 6: mov x26, x1 -; Inst 7: 1: ldaxr x27, [x25]; and x28, x27, x26; stlxr w24, x28, [x25]; cbnz w24, 1b -; Inst 8: ldp x24, x25, [sp], #16 -; Inst 9: ldp x26, x27, [sp], #16 -; Inst 10: ldr x28, [sp], #16 -; Inst 11: ldp fp, lr, [sp], #16 -; Inst 12: ret -; }} +; stp fp, lr, [sp, #-16]! +; mov fp, sp +; str x28, [sp, #-16]! +; stp x26, x27, [sp, #-16]! +; stp x24, x25, [sp, #-16]! +; block0: +; mov x25, x0 +; mov x4, x1 +; mov x26, x4 +; 1: ldaxr x27, [x25]; and x28, x27, x26; stlxr w24, x28, [x25]; cbnz w24, 1b +; ldp x24, x25, [sp], #16 +; ldp x26, x27, [sp], #16 +; ldr x28, [sp], #16 +; ldp fp, lr, [sp], #16 +; ret function %atomic_rmw_and_i32(i64, i32) { block0(v0: i64, v1: i32): @@ -137,25 +117,21 @@ block0(v0: i64, v1: i32): return } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 13) -; Inst 0: stp fp, lr, [sp, #-16]! -; Inst 1: mov fp, sp -; Inst 2: str x28, [sp, #-16]! -; Inst 3: stp x26, x27, [sp, #-16]! -; Inst 4: stp x24, x25, [sp, #-16]! -; Inst 5: mov x25, x0 -; Inst 6: mov x26, x1 -; Inst 7: 1: ldaxr w27, [x25]; and w28, w27, w26; stlxr w24, w28, [x25]; cbnz w24, 1b -; Inst 8: ldp x24, x25, [sp], #16 -; Inst 9: ldp x26, x27, [sp], #16 -; Inst 10: ldr x28, [sp], #16 -; Inst 11: ldp fp, lr, [sp], #16 -; Inst 12: ret -; }} +; stp fp, lr, [sp, #-16]! +; mov fp, sp +; str x28, [sp, #-16]! +; stp x26, x27, [sp, #-16]! +; stp x24, x25, [sp, #-16]! +; block0: +; mov x25, x0 +; mov x4, x1 +; mov x26, x4 +; 1: ldaxr w27, [x25]; and w28, w27, w26; stlxr w24, w28, [x25]; cbnz w24, 1b +; ldp x24, x25, [sp], #16 +; ldp x26, x27, [sp], #16 +; ldr x28, [sp], #16 +; ldp fp, lr, [sp], #16 +; ret function %atomic_rmw_and_i16(i64, i16) { block0(v0: i64, v1: i16): @@ -163,25 +139,21 @@ block0(v0: i64, v1: i16): return } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 13) -; Inst 0: stp fp, lr, [sp, #-16]! -; Inst 1: mov fp, sp -; Inst 2: str x28, [sp, #-16]! -; Inst 3: stp x26, x27, [sp, #-16]! -; Inst 4: stp x24, x25, [sp, #-16]! -; Inst 5: mov x25, x0 -; Inst 6: mov x26, x1 -; Inst 7: 1: ldaxrh w27, [x25]; and w28, w27, w26; stlxrh w24, w28, [x25]; cbnz w24, 1b -; Inst 8: ldp x24, x25, [sp], #16 -; Inst 9: ldp x26, x27, [sp], #16 -; Inst 10: ldr x28, [sp], #16 -; Inst 11: ldp fp, lr, [sp], #16 -; Inst 12: ret -; }} +; stp fp, lr, [sp, #-16]! +; mov fp, sp +; str x28, [sp, #-16]! +; stp x26, x27, [sp, #-16]! +; stp x24, x25, [sp, #-16]! +; block0: +; mov x25, x0 +; mov x4, x1 +; mov x26, x4 +; 1: ldaxrh w27, [x25]; and w28, w27, w26; stlxrh w24, w28, [x25]; cbnz w24, 1b +; ldp x24, x25, [sp], #16 +; ldp x26, x27, [sp], #16 +; ldr x28, [sp], #16 +; ldp fp, lr, [sp], #16 +; ret function %atomic_rmw_and_i8(i64, i8) { block0(v0: i64, v1: i8): @@ -189,25 +161,21 @@ block0(v0: i64, v1: i8): return } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 13) -; Inst 0: stp fp, lr, [sp, #-16]! -; Inst 1: mov fp, sp -; Inst 2: str x28, [sp, #-16]! -; Inst 3: stp x26, x27, [sp, #-16]! -; Inst 4: stp x24, x25, [sp, #-16]! -; Inst 5: mov x25, x0 -; Inst 6: mov x26, x1 -; Inst 7: 1: ldaxrb w27, [x25]; and w28, w27, w26; stlxrb w24, w28, [x25]; cbnz w24, 1b -; Inst 8: ldp x24, x25, [sp], #16 -; Inst 9: ldp x26, x27, [sp], #16 -; Inst 10: ldr x28, [sp], #16 -; Inst 11: ldp fp, lr, [sp], #16 -; Inst 12: ret -; }} +; stp fp, lr, [sp, #-16]! +; mov fp, sp +; str x28, [sp, #-16]! +; stp x26, x27, [sp, #-16]! +; stp x24, x25, [sp, #-16]! +; block0: +; mov x25, x0 +; mov x4, x1 +; mov x26, x4 +; 1: ldaxrb w27, [x25]; and w28, w27, w26; stlxrb w24, w28, [x25]; cbnz w24, 1b +; ldp x24, x25, [sp], #16 +; ldp x26, x27, [sp], #16 +; ldr x28, [sp], #16 +; ldp fp, lr, [sp], #16 +; ret function %atomic_rmw_nand_i64(i64, i64) { block0(v0: i64, v1: i64): @@ -215,25 +183,21 @@ block0(v0: i64, v1: i64): return } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 13) -; Inst 0: stp fp, lr, [sp, #-16]! -; Inst 1: mov fp, sp -; Inst 2: str x28, [sp, #-16]! -; Inst 3: stp x26, x27, [sp, #-16]! -; Inst 4: stp x24, x25, [sp, #-16]! -; Inst 5: mov x25, x0 -; Inst 6: mov x26, x1 -; Inst 7: 1: ldaxr x27, [x25]; and x28, x27, x26; mvn x28, x28; stlxr w24, x28, [x25]; cbnz w24, 1b -; Inst 8: ldp x24, x25, [sp], #16 -; Inst 9: ldp x26, x27, [sp], #16 -; Inst 10: ldr x28, [sp], #16 -; Inst 11: ldp fp, lr, [sp], #16 -; Inst 12: ret -; }} +; stp fp, lr, [sp, #-16]! +; mov fp, sp +; str x28, [sp, #-16]! +; stp x26, x27, [sp, #-16]! +; stp x24, x25, [sp, #-16]! +; block0: +; mov x25, x0 +; mov x4, x1 +; mov x26, x4 +; 1: ldaxr x27, [x25]; and x28, x27, x26; mvn x28, x28; stlxr w24, x28, [x25]; cbnz w24, 1b +; ldp x24, x25, [sp], #16 +; ldp x26, x27, [sp], #16 +; ldr x28, [sp], #16 +; ldp fp, lr, [sp], #16 +; ret function %atomic_rmw_nand_i32(i64, i32) { block0(v0: i64, v1: i32): @@ -241,25 +205,21 @@ block0(v0: i64, v1: i32): return } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 13) -; Inst 0: stp fp, lr, [sp, #-16]! -; Inst 1: mov fp, sp -; Inst 2: str x28, [sp, #-16]! -; Inst 3: stp x26, x27, [sp, #-16]! -; Inst 4: stp x24, x25, [sp, #-16]! -; Inst 5: mov x25, x0 -; Inst 6: mov x26, x1 -; Inst 7: 1: ldaxr w27, [x25]; and w28, w27, w26; mvn w28, w28; stlxr w24, w28, [x25]; cbnz w24, 1b -; Inst 8: ldp x24, x25, [sp], #16 -; Inst 9: ldp x26, x27, [sp], #16 -; Inst 10: ldr x28, [sp], #16 -; Inst 11: ldp fp, lr, [sp], #16 -; Inst 12: ret -; }} +; stp fp, lr, [sp, #-16]! +; mov fp, sp +; str x28, [sp, #-16]! +; stp x26, x27, [sp, #-16]! +; stp x24, x25, [sp, #-16]! +; block0: +; mov x25, x0 +; mov x4, x1 +; mov x26, x4 +; 1: ldaxr w27, [x25]; and w28, w27, w26; mvn w28, w28; stlxr w24, w28, [x25]; cbnz w24, 1b +; ldp x24, x25, [sp], #16 +; ldp x26, x27, [sp], #16 +; ldr x28, [sp], #16 +; ldp fp, lr, [sp], #16 +; ret function %atomic_rmw_nand_i16(i64, i16) { block0(v0: i64, v1: i16): @@ -267,25 +227,21 @@ block0(v0: i64, v1: i16): return } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 13) -; Inst 0: stp fp, lr, [sp, #-16]! -; Inst 1: mov fp, sp -; Inst 2: str x28, [sp, #-16]! -; Inst 3: stp x26, x27, [sp, #-16]! -; Inst 4: stp x24, x25, [sp, #-16]! -; Inst 5: mov x25, x0 -; Inst 6: mov x26, x1 -; Inst 7: 1: ldaxrh w27, [x25]; and w28, w27, w26; mvn w28, w28; stlxrh w24, w28, [x25]; cbnz w24, 1b -; Inst 8: ldp x24, x25, [sp], #16 -; Inst 9: ldp x26, x27, [sp], #16 -; Inst 10: ldr x28, [sp], #16 -; Inst 11: ldp fp, lr, [sp], #16 -; Inst 12: ret -; }} +; stp fp, lr, [sp, #-16]! +; mov fp, sp +; str x28, [sp, #-16]! +; stp x26, x27, [sp, #-16]! +; stp x24, x25, [sp, #-16]! +; block0: +; mov x25, x0 +; mov x4, x1 +; mov x26, x4 +; 1: ldaxrh w27, [x25]; and w28, w27, w26; mvn w28, w28; stlxrh w24, w28, [x25]; cbnz w24, 1b +; ldp x24, x25, [sp], #16 +; ldp x26, x27, [sp], #16 +; ldr x28, [sp], #16 +; ldp fp, lr, [sp], #16 +; ret function %atomic_rmw_nand_i8(i64, i8) { block0(v0: i64, v1: i8): @@ -293,25 +249,21 @@ block0(v0: i64, v1: i8): return } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 13) -; Inst 0: stp fp, lr, [sp, #-16]! -; Inst 1: mov fp, sp -; Inst 2: str x28, [sp, #-16]! -; Inst 3: stp x26, x27, [sp, #-16]! -; Inst 4: stp x24, x25, [sp, #-16]! -; Inst 5: mov x25, x0 -; Inst 6: mov x26, x1 -; Inst 7: 1: ldaxrb w27, [x25]; and w28, w27, w26; mvn w28, w28; stlxrb w24, w28, [x25]; cbnz w24, 1b -; Inst 8: ldp x24, x25, [sp], #16 -; Inst 9: ldp x26, x27, [sp], #16 -; Inst 10: ldr x28, [sp], #16 -; Inst 11: ldp fp, lr, [sp], #16 -; Inst 12: ret -; }} +; stp fp, lr, [sp, #-16]! +; mov fp, sp +; str x28, [sp, #-16]! +; stp x26, x27, [sp, #-16]! +; stp x24, x25, [sp, #-16]! +; block0: +; mov x25, x0 +; mov x4, x1 +; mov x26, x4 +; 1: ldaxrb w27, [x25]; and w28, w27, w26; mvn w28, w28; stlxrb w24, w28, [x25]; cbnz w24, 1b +; ldp x24, x25, [sp], #16 +; ldp x26, x27, [sp], #16 +; ldr x28, [sp], #16 +; ldp fp, lr, [sp], #16 +; ret function %atomic_rmw_or_i64(i64, i64) { block0(v0: i64, v1: i64): @@ -319,25 +271,21 @@ block0(v0: i64, v1: i64): return } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 13) -; Inst 0: stp fp, lr, [sp, #-16]! -; Inst 1: mov fp, sp -; Inst 2: str x28, [sp, #-16]! -; Inst 3: stp x26, x27, [sp, #-16]! -; Inst 4: stp x24, x25, [sp, #-16]! -; Inst 5: mov x25, x0 -; Inst 6: mov x26, x1 -; Inst 7: 1: ldaxr x27, [x25]; orr x28, x27, x26; stlxr w24, x28, [x25]; cbnz w24, 1b -; Inst 8: ldp x24, x25, [sp], #16 -; Inst 9: ldp x26, x27, [sp], #16 -; Inst 10: ldr x28, [sp], #16 -; Inst 11: ldp fp, lr, [sp], #16 -; Inst 12: ret -; }} +; stp fp, lr, [sp, #-16]! +; mov fp, sp +; str x28, [sp, #-16]! +; stp x26, x27, [sp, #-16]! +; stp x24, x25, [sp, #-16]! +; block0: +; mov x25, x0 +; mov x4, x1 +; mov x26, x4 +; 1: ldaxr x27, [x25]; orr x28, x27, x26; stlxr w24, x28, [x25]; cbnz w24, 1b +; ldp x24, x25, [sp], #16 +; ldp x26, x27, [sp], #16 +; ldr x28, [sp], #16 +; ldp fp, lr, [sp], #16 +; ret function %atomic_rmw_or_i32(i64, i32) { block0(v0: i64, v1: i32): @@ -345,25 +293,21 @@ block0(v0: i64, v1: i32): return } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 13) -; Inst 0: stp fp, lr, [sp, #-16]! -; Inst 1: mov fp, sp -; Inst 2: str x28, [sp, #-16]! -; Inst 3: stp x26, x27, [sp, #-16]! -; Inst 4: stp x24, x25, [sp, #-16]! -; Inst 5: mov x25, x0 -; Inst 6: mov x26, x1 -; Inst 7: 1: ldaxr w27, [x25]; orr w28, w27, w26; stlxr w24, w28, [x25]; cbnz w24, 1b -; Inst 8: ldp x24, x25, [sp], #16 -; Inst 9: ldp x26, x27, [sp], #16 -; Inst 10: ldr x28, [sp], #16 -; Inst 11: ldp fp, lr, [sp], #16 -; Inst 12: ret -; }} +; stp fp, lr, [sp, #-16]! +; mov fp, sp +; str x28, [sp, #-16]! +; stp x26, x27, [sp, #-16]! +; stp x24, x25, [sp, #-16]! +; block0: +; mov x25, x0 +; mov x4, x1 +; mov x26, x4 +; 1: ldaxr w27, [x25]; orr w28, w27, w26; stlxr w24, w28, [x25]; cbnz w24, 1b +; ldp x24, x25, [sp], #16 +; ldp x26, x27, [sp], #16 +; ldr x28, [sp], #16 +; ldp fp, lr, [sp], #16 +; ret function %atomic_rmw_or_i16(i64, i16) { block0(v0: i64, v1: i16): @@ -371,25 +315,21 @@ block0(v0: i64, v1: i16): return } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 13) -; Inst 0: stp fp, lr, [sp, #-16]! -; Inst 1: mov fp, sp -; Inst 2: str x28, [sp, #-16]! -; Inst 3: stp x26, x27, [sp, #-16]! -; Inst 4: stp x24, x25, [sp, #-16]! -; Inst 5: mov x25, x0 -; Inst 6: mov x26, x1 -; Inst 7: 1: ldaxrh w27, [x25]; orr w28, w27, w26; stlxrh w24, w28, [x25]; cbnz w24, 1b -; Inst 8: ldp x24, x25, [sp], #16 -; Inst 9: ldp x26, x27, [sp], #16 -; Inst 10: ldr x28, [sp], #16 -; Inst 11: ldp fp, lr, [sp], #16 -; Inst 12: ret -; }} +; stp fp, lr, [sp, #-16]! +; mov fp, sp +; str x28, [sp, #-16]! +; stp x26, x27, [sp, #-16]! +; stp x24, x25, [sp, #-16]! +; block0: +; mov x25, x0 +; mov x4, x1 +; mov x26, x4 +; 1: ldaxrh w27, [x25]; orr w28, w27, w26; stlxrh w24, w28, [x25]; cbnz w24, 1b +; ldp x24, x25, [sp], #16 +; ldp x26, x27, [sp], #16 +; ldr x28, [sp], #16 +; ldp fp, lr, [sp], #16 +; ret function %atomic_rmw_or_i8(i64, i8) { block0(v0: i64, v1: i8): @@ -397,25 +337,21 @@ block0(v0: i64, v1: i8): return } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 13) -; Inst 0: stp fp, lr, [sp, #-16]! -; Inst 1: mov fp, sp -; Inst 2: str x28, [sp, #-16]! -; Inst 3: stp x26, x27, [sp, #-16]! -; Inst 4: stp x24, x25, [sp, #-16]! -; Inst 5: mov x25, x0 -; Inst 6: mov x26, x1 -; Inst 7: 1: ldaxrb w27, [x25]; orr w28, w27, w26; stlxrb w24, w28, [x25]; cbnz w24, 1b -; Inst 8: ldp x24, x25, [sp], #16 -; Inst 9: ldp x26, x27, [sp], #16 -; Inst 10: ldr x28, [sp], #16 -; Inst 11: ldp fp, lr, [sp], #16 -; Inst 12: ret -; }} +; stp fp, lr, [sp, #-16]! +; mov fp, sp +; str x28, [sp, #-16]! +; stp x26, x27, [sp, #-16]! +; stp x24, x25, [sp, #-16]! +; block0: +; mov x25, x0 +; mov x4, x1 +; mov x26, x4 +; 1: ldaxrb w27, [x25]; orr w28, w27, w26; stlxrb w24, w28, [x25]; cbnz w24, 1b +; ldp x24, x25, [sp], #16 +; ldp x26, x27, [sp], #16 +; ldr x28, [sp], #16 +; ldp fp, lr, [sp], #16 +; ret function %atomic_rmw_xor_i64(i64, i64) { block0(v0: i64, v1: i64): @@ -423,25 +359,21 @@ block0(v0: i64, v1: i64): return } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 13) -; Inst 0: stp fp, lr, [sp, #-16]! -; Inst 1: mov fp, sp -; Inst 2: str x28, [sp, #-16]! -; Inst 3: stp x26, x27, [sp, #-16]! -; Inst 4: stp x24, x25, [sp, #-16]! -; Inst 5: mov x25, x0 -; Inst 6: mov x26, x1 -; Inst 7: 1: ldaxr x27, [x25]; eor x28, x27, x26; stlxr w24, x28, [x25]; cbnz w24, 1b -; Inst 8: ldp x24, x25, [sp], #16 -; Inst 9: ldp x26, x27, [sp], #16 -; Inst 10: ldr x28, [sp], #16 -; Inst 11: ldp fp, lr, [sp], #16 -; Inst 12: ret -; }} +; stp fp, lr, [sp, #-16]! +; mov fp, sp +; str x28, [sp, #-16]! +; stp x26, x27, [sp, #-16]! +; stp x24, x25, [sp, #-16]! +; block0: +; mov x25, x0 +; mov x4, x1 +; mov x26, x4 +; 1: ldaxr x27, [x25]; eor x28, x27, x26; stlxr w24, x28, [x25]; cbnz w24, 1b +; ldp x24, x25, [sp], #16 +; ldp x26, x27, [sp], #16 +; ldr x28, [sp], #16 +; ldp fp, lr, [sp], #16 +; ret function %atomic_rmw_xor_i32(i64, i32) { block0(v0: i64, v1: i32): @@ -449,25 +381,21 @@ block0(v0: i64, v1: i32): return } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 13) -; Inst 0: stp fp, lr, [sp, #-16]! -; Inst 1: mov fp, sp -; Inst 2: str x28, [sp, #-16]! -; Inst 3: stp x26, x27, [sp, #-16]! -; Inst 4: stp x24, x25, [sp, #-16]! -; Inst 5: mov x25, x0 -; Inst 6: mov x26, x1 -; Inst 7: 1: ldaxr w27, [x25]; eor w28, w27, w26; stlxr w24, w28, [x25]; cbnz w24, 1b -; Inst 8: ldp x24, x25, [sp], #16 -; Inst 9: ldp x26, x27, [sp], #16 -; Inst 10: ldr x28, [sp], #16 -; Inst 11: ldp fp, lr, [sp], #16 -; Inst 12: ret -; }} +; stp fp, lr, [sp, #-16]! +; mov fp, sp +; str x28, [sp, #-16]! +; stp x26, x27, [sp, #-16]! +; stp x24, x25, [sp, #-16]! +; block0: +; mov x25, x0 +; mov x4, x1 +; mov x26, x4 +; 1: ldaxr w27, [x25]; eor w28, w27, w26; stlxr w24, w28, [x25]; cbnz w24, 1b +; ldp x24, x25, [sp], #16 +; ldp x26, x27, [sp], #16 +; ldr x28, [sp], #16 +; ldp fp, lr, [sp], #16 +; ret function %atomic_rmw_xor_i16(i64, i16) { block0(v0: i64, v1: i16): @@ -475,25 +403,21 @@ block0(v0: i64, v1: i16): return } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 13) -; Inst 0: stp fp, lr, [sp, #-16]! -; Inst 1: mov fp, sp -; Inst 2: str x28, [sp, #-16]! -; Inst 3: stp x26, x27, [sp, #-16]! -; Inst 4: stp x24, x25, [sp, #-16]! -; Inst 5: mov x25, x0 -; Inst 6: mov x26, x1 -; Inst 7: 1: ldaxrh w27, [x25]; eor w28, w27, w26; stlxrh w24, w28, [x25]; cbnz w24, 1b -; Inst 8: ldp x24, x25, [sp], #16 -; Inst 9: ldp x26, x27, [sp], #16 -; Inst 10: ldr x28, [sp], #16 -; Inst 11: ldp fp, lr, [sp], #16 -; Inst 12: ret -; }} +; stp fp, lr, [sp, #-16]! +; mov fp, sp +; str x28, [sp, #-16]! +; stp x26, x27, [sp, #-16]! +; stp x24, x25, [sp, #-16]! +; block0: +; mov x25, x0 +; mov x4, x1 +; mov x26, x4 +; 1: ldaxrh w27, [x25]; eor w28, w27, w26; stlxrh w24, w28, [x25]; cbnz w24, 1b +; ldp x24, x25, [sp], #16 +; ldp x26, x27, [sp], #16 +; ldr x28, [sp], #16 +; ldp fp, lr, [sp], #16 +; ret function %atomic_rmw_xor_i8(i64, i8) { block0(v0: i64, v1: i8): @@ -501,25 +425,21 @@ block0(v0: i64, v1: i8): return } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 13) -; Inst 0: stp fp, lr, [sp, #-16]! -; Inst 1: mov fp, sp -; Inst 2: str x28, [sp, #-16]! -; Inst 3: stp x26, x27, [sp, #-16]! -; Inst 4: stp x24, x25, [sp, #-16]! -; Inst 5: mov x25, x0 -; Inst 6: mov x26, x1 -; Inst 7: 1: ldaxrb w27, [x25]; eor w28, w27, w26; stlxrb w24, w28, [x25]; cbnz w24, 1b -; Inst 8: ldp x24, x25, [sp], #16 -; Inst 9: ldp x26, x27, [sp], #16 -; Inst 10: ldr x28, [sp], #16 -; Inst 11: ldp fp, lr, [sp], #16 -; Inst 12: ret -; }} +; stp fp, lr, [sp, #-16]! +; mov fp, sp +; str x28, [sp, #-16]! +; stp x26, x27, [sp, #-16]! +; stp x24, x25, [sp, #-16]! +; block0: +; mov x25, x0 +; mov x4, x1 +; mov x26, x4 +; 1: ldaxrb w27, [x25]; eor w28, w27, w26; stlxrb w24, w28, [x25]; cbnz w24, 1b +; ldp x24, x25, [sp], #16 +; ldp x26, x27, [sp], #16 +; ldr x28, [sp], #16 +; ldp fp, lr, [sp], #16 +; ret function %atomic_rmw_smax_i64(i64, i64) { block0(v0: i64, v1: i64): @@ -527,25 +447,21 @@ block0(v0: i64, v1: i64): return } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 13) -; Inst 0: stp fp, lr, [sp, #-16]! -; Inst 1: mov fp, sp -; Inst 2: str x28, [sp, #-16]! -; Inst 3: stp x26, x27, [sp, #-16]! -; Inst 4: stp x24, x25, [sp, #-16]! -; Inst 5: mov x25, x0 -; Inst 6: mov x26, x1 -; Inst 7: 1: ldaxr x27, [x25]; cmp x27, x26; csel x28, x27, x26, gt; stlxr w24, x28, [x25]; cbnz w24, 1b -; Inst 8: ldp x24, x25, [sp], #16 -; Inst 9: ldp x26, x27, [sp], #16 -; Inst 10: ldr x28, [sp], #16 -; Inst 11: ldp fp, lr, [sp], #16 -; Inst 12: ret -; }} +; stp fp, lr, [sp, #-16]! +; mov fp, sp +; str x28, [sp, #-16]! +; stp x26, x27, [sp, #-16]! +; stp x24, x25, [sp, #-16]! +; block0: +; mov x25, x0 +; mov x4, x1 +; mov x26, x4 +; 1: ldaxr x27, [x25]; cmp x27, x26; csel x28, x27, x26, gt; stlxr w24, x28, [x25]; cbnz w24, 1b +; ldp x24, x25, [sp], #16 +; ldp x26, x27, [sp], #16 +; ldr x28, [sp], #16 +; ldp fp, lr, [sp], #16 +; ret function %atomic_rmw_smax_i32(i64, i32) { block0(v0: i64, v1: i32): @@ -553,25 +469,21 @@ block0(v0: i64, v1: i32): return } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 13) -; Inst 0: stp fp, lr, [sp, #-16]! -; Inst 1: mov fp, sp -; Inst 2: str x28, [sp, #-16]! -; Inst 3: stp x26, x27, [sp, #-16]! -; Inst 4: stp x24, x25, [sp, #-16]! -; Inst 5: mov x25, x0 -; Inst 6: mov x26, x1 -; Inst 7: 1: ldaxr w27, [x25]; cmp w27, w26; csel w28, w27, w26, gt; stlxr w24, w28, [x25]; cbnz w24, 1b -; Inst 8: ldp x24, x25, [sp], #16 -; Inst 9: ldp x26, x27, [sp], #16 -; Inst 10: ldr x28, [sp], #16 -; Inst 11: ldp fp, lr, [sp], #16 -; Inst 12: ret -; }} +; stp fp, lr, [sp, #-16]! +; mov fp, sp +; str x28, [sp, #-16]! +; stp x26, x27, [sp, #-16]! +; stp x24, x25, [sp, #-16]! +; block0: +; mov x25, x0 +; mov x4, x1 +; mov x26, x4 +; 1: ldaxr w27, [x25]; cmp w27, w26; csel w28, w27, w26, gt; stlxr w24, w28, [x25]; cbnz w24, 1b +; ldp x24, x25, [sp], #16 +; ldp x26, x27, [sp], #16 +; ldr x28, [sp], #16 +; ldp fp, lr, [sp], #16 +; ret function %atomic_rmw_smax_i16(i64, i16) { block0(v0: i64, v1: i16): @@ -579,25 +491,21 @@ block0(v0: i64, v1: i16): return } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 13) -; Inst 0: stp fp, lr, [sp, #-16]! -; Inst 1: mov fp, sp -; Inst 2: str x28, [sp, #-16]! -; Inst 3: stp x26, x27, [sp, #-16]! -; Inst 4: stp x24, x25, [sp, #-16]! -; Inst 5: mov x25, x0 -; Inst 6: mov x26, x1 -; Inst 7: 1: ldaxrh w27, [x25]; cmp w27, w26; csel w28, w27, w26, gt; stlxrh w24, w28, [x25]; cbnz w24, 1b -; Inst 8: ldp x24, x25, [sp], #16 -; Inst 9: ldp x26, x27, [sp], #16 -; Inst 10: ldr x28, [sp], #16 -; Inst 11: ldp fp, lr, [sp], #16 -; Inst 12: ret -; }} +; stp fp, lr, [sp, #-16]! +; mov fp, sp +; str x28, [sp, #-16]! +; stp x26, x27, [sp, #-16]! +; stp x24, x25, [sp, #-16]! +; block0: +; mov x25, x0 +; mov x4, x1 +; mov x26, x4 +; 1: ldaxrh w27, [x25]; cmp w27, w26; csel w28, w27, w26, gt; stlxrh w24, w28, [x25]; cbnz w24, 1b +; ldp x24, x25, [sp], #16 +; ldp x26, x27, [sp], #16 +; ldr x28, [sp], #16 +; ldp fp, lr, [sp], #16 +; ret function %atomic_rmw_smax_i8(i64, i8) { block0(v0: i64, v1: i8): @@ -605,25 +513,21 @@ block0(v0: i64, v1: i8): return } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 13) -; Inst 0: stp fp, lr, [sp, #-16]! -; Inst 1: mov fp, sp -; Inst 2: str x28, [sp, #-16]! -; Inst 3: stp x26, x27, [sp, #-16]! -; Inst 4: stp x24, x25, [sp, #-16]! -; Inst 5: mov x25, x0 -; Inst 6: mov x26, x1 -; Inst 7: 1: ldaxrb w27, [x25]; cmp w27, w26; csel w28, w27, w26, gt; stlxrb w24, w28, [x25]; cbnz w24, 1b -; Inst 8: ldp x24, x25, [sp], #16 -; Inst 9: ldp x26, x27, [sp], #16 -; Inst 10: ldr x28, [sp], #16 -; Inst 11: ldp fp, lr, [sp], #16 -; Inst 12: ret -; }} +; stp fp, lr, [sp, #-16]! +; mov fp, sp +; str x28, [sp, #-16]! +; stp x26, x27, [sp, #-16]! +; stp x24, x25, [sp, #-16]! +; block0: +; mov x25, x0 +; mov x4, x1 +; mov x26, x4 +; 1: ldaxrb w27, [x25]; cmp w27, w26; csel w28, w27, w26, gt; stlxrb w24, w28, [x25]; cbnz w24, 1b +; ldp x24, x25, [sp], #16 +; ldp x26, x27, [sp], #16 +; ldr x28, [sp], #16 +; ldp fp, lr, [sp], #16 +; ret function %atomic_rmw_umax_i64(i64, i64) { block0(v0: i64, v1: i64): @@ -631,25 +535,21 @@ block0(v0: i64, v1: i64): return } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 13) -; Inst 0: stp fp, lr, [sp, #-16]! -; Inst 1: mov fp, sp -; Inst 2: str x28, [sp, #-16]! -; Inst 3: stp x26, x27, [sp, #-16]! -; Inst 4: stp x24, x25, [sp, #-16]! -; Inst 5: mov x25, x0 -; Inst 6: mov x26, x1 -; Inst 7: 1: ldaxr x27, [x25]; cmp x27, x26; csel x28, x27, x26, hi; stlxr w24, x28, [x25]; cbnz w24, 1b -; Inst 8: ldp x24, x25, [sp], #16 -; Inst 9: ldp x26, x27, [sp], #16 -; Inst 10: ldr x28, [sp], #16 -; Inst 11: ldp fp, lr, [sp], #16 -; Inst 12: ret -; }} +; stp fp, lr, [sp, #-16]! +; mov fp, sp +; str x28, [sp, #-16]! +; stp x26, x27, [sp, #-16]! +; stp x24, x25, [sp, #-16]! +; block0: +; mov x25, x0 +; mov x4, x1 +; mov x26, x4 +; 1: ldaxr x27, [x25]; cmp x27, x26; csel x28, x27, x26, hi; stlxr w24, x28, [x25]; cbnz w24, 1b +; ldp x24, x25, [sp], #16 +; ldp x26, x27, [sp], #16 +; ldr x28, [sp], #16 +; ldp fp, lr, [sp], #16 +; ret function %atomic_rmw_umax_i32(i64, i32) { block0(v0: i64, v1: i32): @@ -657,25 +557,21 @@ block0(v0: i64, v1: i32): return } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 13) -; Inst 0: stp fp, lr, [sp, #-16]! -; Inst 1: mov fp, sp -; Inst 2: str x28, [sp, #-16]! -; Inst 3: stp x26, x27, [sp, #-16]! -; Inst 4: stp x24, x25, [sp, #-16]! -; Inst 5: mov x25, x0 -; Inst 6: mov x26, x1 -; Inst 7: 1: ldaxr w27, [x25]; cmp w27, w26; csel w28, w27, w26, hi; stlxr w24, w28, [x25]; cbnz w24, 1b -; Inst 8: ldp x24, x25, [sp], #16 -; Inst 9: ldp x26, x27, [sp], #16 -; Inst 10: ldr x28, [sp], #16 -; Inst 11: ldp fp, lr, [sp], #16 -; Inst 12: ret -; }} +; stp fp, lr, [sp, #-16]! +; mov fp, sp +; str x28, [sp, #-16]! +; stp x26, x27, [sp, #-16]! +; stp x24, x25, [sp, #-16]! +; block0: +; mov x25, x0 +; mov x4, x1 +; mov x26, x4 +; 1: ldaxr w27, [x25]; cmp w27, w26; csel w28, w27, w26, hi; stlxr w24, w28, [x25]; cbnz w24, 1b +; ldp x24, x25, [sp], #16 +; ldp x26, x27, [sp], #16 +; ldr x28, [sp], #16 +; ldp fp, lr, [sp], #16 +; ret function %atomic_rmw_umax_i16(i64, i16) { block0(v0: i64, v1: i16): @@ -683,25 +579,21 @@ block0(v0: i64, v1: i16): return } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 13) -; Inst 0: stp fp, lr, [sp, #-16]! -; Inst 1: mov fp, sp -; Inst 2: str x28, [sp, #-16]! -; Inst 3: stp x26, x27, [sp, #-16]! -; Inst 4: stp x24, x25, [sp, #-16]! -; Inst 5: mov x25, x0 -; Inst 6: mov x26, x1 -; Inst 7: 1: ldaxrh w27, [x25]; cmp w27, w26; csel w28, w27, w26, hi; stlxrh w24, w28, [x25]; cbnz w24, 1b -; Inst 8: ldp x24, x25, [sp], #16 -; Inst 9: ldp x26, x27, [sp], #16 -; Inst 10: ldr x28, [sp], #16 -; Inst 11: ldp fp, lr, [sp], #16 -; Inst 12: ret -; }} +; stp fp, lr, [sp, #-16]! +; mov fp, sp +; str x28, [sp, #-16]! +; stp x26, x27, [sp, #-16]! +; stp x24, x25, [sp, #-16]! +; block0: +; mov x25, x0 +; mov x4, x1 +; mov x26, x4 +; 1: ldaxrh w27, [x25]; cmp w27, w26; csel w28, w27, w26, hi; stlxrh w24, w28, [x25]; cbnz w24, 1b +; ldp x24, x25, [sp], #16 +; ldp x26, x27, [sp], #16 +; ldr x28, [sp], #16 +; ldp fp, lr, [sp], #16 +; ret function %atomic_rmw_umax_i8(i64, i8) { block0(v0: i64, v1: i8): @@ -709,25 +601,21 @@ block0(v0: i64, v1: i8): return } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 13) -; Inst 0: stp fp, lr, [sp, #-16]! -; Inst 1: mov fp, sp -; Inst 2: str x28, [sp, #-16]! -; Inst 3: stp x26, x27, [sp, #-16]! -; Inst 4: stp x24, x25, [sp, #-16]! -; Inst 5: mov x25, x0 -; Inst 6: mov x26, x1 -; Inst 7: 1: ldaxrb w27, [x25]; cmp w27, w26; csel w28, w27, w26, hi; stlxrb w24, w28, [x25]; cbnz w24, 1b -; Inst 8: ldp x24, x25, [sp], #16 -; Inst 9: ldp x26, x27, [sp], #16 -; Inst 10: ldr x28, [sp], #16 -; Inst 11: ldp fp, lr, [sp], #16 -; Inst 12: ret -; }} +; stp fp, lr, [sp, #-16]! +; mov fp, sp +; str x28, [sp, #-16]! +; stp x26, x27, [sp, #-16]! +; stp x24, x25, [sp, #-16]! +; block0: +; mov x25, x0 +; mov x4, x1 +; mov x26, x4 +; 1: ldaxrb w27, [x25]; cmp w27, w26; csel w28, w27, w26, hi; stlxrb w24, w28, [x25]; cbnz w24, 1b +; ldp x24, x25, [sp], #16 +; ldp x26, x27, [sp], #16 +; ldr x28, [sp], #16 +; ldp fp, lr, [sp], #16 +; ret function %atomic_rmw_smin_i64(i64, i64) { block0(v0: i64, v1: i64): @@ -735,25 +623,21 @@ block0(v0: i64, v1: i64): return } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 13) -; Inst 0: stp fp, lr, [sp, #-16]! -; Inst 1: mov fp, sp -; Inst 2: str x28, [sp, #-16]! -; Inst 3: stp x26, x27, [sp, #-16]! -; Inst 4: stp x24, x25, [sp, #-16]! -; Inst 5: mov x25, x0 -; Inst 6: mov x26, x1 -; Inst 7: 1: ldaxr x27, [x25]; cmp x27, x26; csel x28, x27, x26, lt; stlxr w24, x28, [x25]; cbnz w24, 1b -; Inst 8: ldp x24, x25, [sp], #16 -; Inst 9: ldp x26, x27, [sp], #16 -; Inst 10: ldr x28, [sp], #16 -; Inst 11: ldp fp, lr, [sp], #16 -; Inst 12: ret -; }} +; stp fp, lr, [sp, #-16]! +; mov fp, sp +; str x28, [sp, #-16]! +; stp x26, x27, [sp, #-16]! +; stp x24, x25, [sp, #-16]! +; block0: +; mov x25, x0 +; mov x4, x1 +; mov x26, x4 +; 1: ldaxr x27, [x25]; cmp x27, x26; csel x28, x27, x26, lt; stlxr w24, x28, [x25]; cbnz w24, 1b +; ldp x24, x25, [sp], #16 +; ldp x26, x27, [sp], #16 +; ldr x28, [sp], #16 +; ldp fp, lr, [sp], #16 +; ret function %atomic_rmw_smin_i32(i64, i32) { block0(v0: i64, v1: i32): @@ -761,25 +645,21 @@ block0(v0: i64, v1: i32): return } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 13) -; Inst 0: stp fp, lr, [sp, #-16]! -; Inst 1: mov fp, sp -; Inst 2: str x28, [sp, #-16]! -; Inst 3: stp x26, x27, [sp, #-16]! -; Inst 4: stp x24, x25, [sp, #-16]! -; Inst 5: mov x25, x0 -; Inst 6: mov x26, x1 -; Inst 7: 1: ldaxr w27, [x25]; cmp w27, w26; csel w28, w27, w26, lt; stlxr w24, w28, [x25]; cbnz w24, 1b -; Inst 8: ldp x24, x25, [sp], #16 -; Inst 9: ldp x26, x27, [sp], #16 -; Inst 10: ldr x28, [sp], #16 -; Inst 11: ldp fp, lr, [sp], #16 -; Inst 12: ret -; }} +; stp fp, lr, [sp, #-16]! +; mov fp, sp +; str x28, [sp, #-16]! +; stp x26, x27, [sp, #-16]! +; stp x24, x25, [sp, #-16]! +; block0: +; mov x25, x0 +; mov x4, x1 +; mov x26, x4 +; 1: ldaxr w27, [x25]; cmp w27, w26; csel w28, w27, w26, lt; stlxr w24, w28, [x25]; cbnz w24, 1b +; ldp x24, x25, [sp], #16 +; ldp x26, x27, [sp], #16 +; ldr x28, [sp], #16 +; ldp fp, lr, [sp], #16 +; ret function %atomic_rmw_smin_i16(i64, i16) { block0(v0: i64, v1: i16): @@ -787,25 +667,21 @@ block0(v0: i64, v1: i16): return } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 13) -; Inst 0: stp fp, lr, [sp, #-16]! -; Inst 1: mov fp, sp -; Inst 2: str x28, [sp, #-16]! -; Inst 3: stp x26, x27, [sp, #-16]! -; Inst 4: stp x24, x25, [sp, #-16]! -; Inst 5: mov x25, x0 -; Inst 6: mov x26, x1 -; Inst 7: 1: ldaxrh w27, [x25]; cmp w27, w26; csel w28, w27, w26, lt; stlxrh w24, w28, [x25]; cbnz w24, 1b -; Inst 8: ldp x24, x25, [sp], #16 -; Inst 9: ldp x26, x27, [sp], #16 -; Inst 10: ldr x28, [sp], #16 -; Inst 11: ldp fp, lr, [sp], #16 -; Inst 12: ret -; }} +; stp fp, lr, [sp, #-16]! +; mov fp, sp +; str x28, [sp, #-16]! +; stp x26, x27, [sp, #-16]! +; stp x24, x25, [sp, #-16]! +; block0: +; mov x25, x0 +; mov x4, x1 +; mov x26, x4 +; 1: ldaxrh w27, [x25]; cmp w27, w26; csel w28, w27, w26, lt; stlxrh w24, w28, [x25]; cbnz w24, 1b +; ldp x24, x25, [sp], #16 +; ldp x26, x27, [sp], #16 +; ldr x28, [sp], #16 +; ldp fp, lr, [sp], #16 +; ret function %atomic_rmw_smin_i8(i64, i8) { block0(v0: i64, v1: i8): @@ -813,25 +689,21 @@ block0(v0: i64, v1: i8): return } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 13) -; Inst 0: stp fp, lr, [sp, #-16]! -; Inst 1: mov fp, sp -; Inst 2: str x28, [sp, #-16]! -; Inst 3: stp x26, x27, [sp, #-16]! -; Inst 4: stp x24, x25, [sp, #-16]! -; Inst 5: mov x25, x0 -; Inst 6: mov x26, x1 -; Inst 7: 1: ldaxrb w27, [x25]; cmp w27, w26; csel w28, w27, w26, lt; stlxrb w24, w28, [x25]; cbnz w24, 1b -; Inst 8: ldp x24, x25, [sp], #16 -; Inst 9: ldp x26, x27, [sp], #16 -; Inst 10: ldr x28, [sp], #16 -; Inst 11: ldp fp, lr, [sp], #16 -; Inst 12: ret -; }} +; stp fp, lr, [sp, #-16]! +; mov fp, sp +; str x28, [sp, #-16]! +; stp x26, x27, [sp, #-16]! +; stp x24, x25, [sp, #-16]! +; block0: +; mov x25, x0 +; mov x4, x1 +; mov x26, x4 +; 1: ldaxrb w27, [x25]; cmp w27, w26; csel w28, w27, w26, lt; stlxrb w24, w28, [x25]; cbnz w24, 1b +; ldp x24, x25, [sp], #16 +; ldp x26, x27, [sp], #16 +; ldr x28, [sp], #16 +; ldp fp, lr, [sp], #16 +; ret function %atomic_rmw_umin_i64(i64, i64) { block0(v0: i64, v1: i64): @@ -839,25 +711,21 @@ block0(v0: i64, v1: i64): return } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 13) -; Inst 0: stp fp, lr, [sp, #-16]! -; Inst 1: mov fp, sp -; Inst 2: str x28, [sp, #-16]! -; Inst 3: stp x26, x27, [sp, #-16]! -; Inst 4: stp x24, x25, [sp, #-16]! -; Inst 5: mov x25, x0 -; Inst 6: mov x26, x1 -; Inst 7: 1: ldaxr x27, [x25]; cmp x27, x26; csel x28, x27, x26, lo; stlxr w24, x28, [x25]; cbnz w24, 1b -; Inst 8: ldp x24, x25, [sp], #16 -; Inst 9: ldp x26, x27, [sp], #16 -; Inst 10: ldr x28, [sp], #16 -; Inst 11: ldp fp, lr, [sp], #16 -; Inst 12: ret -; }} +; stp fp, lr, [sp, #-16]! +; mov fp, sp +; str x28, [sp, #-16]! +; stp x26, x27, [sp, #-16]! +; stp x24, x25, [sp, #-16]! +; block0: +; mov x25, x0 +; mov x4, x1 +; mov x26, x4 +; 1: ldaxr x27, [x25]; cmp x27, x26; csel x28, x27, x26, lo; stlxr w24, x28, [x25]; cbnz w24, 1b +; ldp x24, x25, [sp], #16 +; ldp x26, x27, [sp], #16 +; ldr x28, [sp], #16 +; ldp fp, lr, [sp], #16 +; ret function %atomic_rmw_umin_i32(i64, i32) { block0(v0: i64, v1: i32): @@ -865,25 +733,21 @@ block0(v0: i64, v1: i32): return } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 13) -; Inst 0: stp fp, lr, [sp, #-16]! -; Inst 1: mov fp, sp -; Inst 2: str x28, [sp, #-16]! -; Inst 3: stp x26, x27, [sp, #-16]! -; Inst 4: stp x24, x25, [sp, #-16]! -; Inst 5: mov x25, x0 -; Inst 6: mov x26, x1 -; Inst 7: 1: ldaxr w27, [x25]; cmp w27, w26; csel w28, w27, w26, lo; stlxr w24, w28, [x25]; cbnz w24, 1b -; Inst 8: ldp x24, x25, [sp], #16 -; Inst 9: ldp x26, x27, [sp], #16 -; Inst 10: ldr x28, [sp], #16 -; Inst 11: ldp fp, lr, [sp], #16 -; Inst 12: ret -; }} +; stp fp, lr, [sp, #-16]! +; mov fp, sp +; str x28, [sp, #-16]! +; stp x26, x27, [sp, #-16]! +; stp x24, x25, [sp, #-16]! +; block0: +; mov x25, x0 +; mov x4, x1 +; mov x26, x4 +; 1: ldaxr w27, [x25]; cmp w27, w26; csel w28, w27, w26, lo; stlxr w24, w28, [x25]; cbnz w24, 1b +; ldp x24, x25, [sp], #16 +; ldp x26, x27, [sp], #16 +; ldr x28, [sp], #16 +; ldp fp, lr, [sp], #16 +; ret function %atomic_rmw_umin_i16(i64, i16) { block0(v0: i64, v1: i16): @@ -891,25 +755,21 @@ block0(v0: i64, v1: i16): return } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 13) -; Inst 0: stp fp, lr, [sp, #-16]! -; Inst 1: mov fp, sp -; Inst 2: str x28, [sp, #-16]! -; Inst 3: stp x26, x27, [sp, #-16]! -; Inst 4: stp x24, x25, [sp, #-16]! -; Inst 5: mov x25, x0 -; Inst 6: mov x26, x1 -; Inst 7: 1: ldaxrh w27, [x25]; cmp w27, w26; csel w28, w27, w26, lo; stlxrh w24, w28, [x25]; cbnz w24, 1b -; Inst 8: ldp x24, x25, [sp], #16 -; Inst 9: ldp x26, x27, [sp], #16 -; Inst 10: ldr x28, [sp], #16 -; Inst 11: ldp fp, lr, [sp], #16 -; Inst 12: ret -; }} +; stp fp, lr, [sp, #-16]! +; mov fp, sp +; str x28, [sp, #-16]! +; stp x26, x27, [sp, #-16]! +; stp x24, x25, [sp, #-16]! +; block0: +; mov x25, x0 +; mov x4, x1 +; mov x26, x4 +; 1: ldaxrh w27, [x25]; cmp w27, w26; csel w28, w27, w26, lo; stlxrh w24, w28, [x25]; cbnz w24, 1b +; ldp x24, x25, [sp], #16 +; ldp x26, x27, [sp], #16 +; ldr x28, [sp], #16 +; ldp fp, lr, [sp], #16 +; ret function %atomic_rmw_umin_i8(i64, i8) { block0(v0: i64, v1: i8): @@ -917,23 +777,19 @@ block0(v0: i64, v1: i8): return } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 13) -; Inst 0: stp fp, lr, [sp, #-16]! -; Inst 1: mov fp, sp -; Inst 2: str x28, [sp, #-16]! -; Inst 3: stp x26, x27, [sp, #-16]! -; Inst 4: stp x24, x25, [sp, #-16]! -; Inst 5: mov x25, x0 -; Inst 6: mov x26, x1 -; Inst 7: 1: ldaxrb w27, [x25]; cmp w27, w26; csel w28, w27, w26, lo; stlxrb w24, w28, [x25]; cbnz w24, 1b -; Inst 8: ldp x24, x25, [sp], #16 -; Inst 9: ldp x26, x27, [sp], #16 -; Inst 10: ldr x28, [sp], #16 -; Inst 11: ldp fp, lr, [sp], #16 -; Inst 12: ret -; }} +; stp fp, lr, [sp, #-16]! +; mov fp, sp +; str x28, [sp, #-16]! +; stp x26, x27, [sp, #-16]! +; stp x24, x25, [sp, #-16]! +; block0: +; mov x25, x0 +; mov x4, x1 +; mov x26, x4 +; 1: ldaxrb w27, [x25]; cmp w27, w26; csel w28, w27, w26, lo; stlxrb w24, w28, [x25]; cbnz w24, 1b +; ldp x24, x25, [sp], #16 +; ldp x26, x27, [sp], #16 +; ldr x28, [sp], #16 +; ldp fp, lr, [sp], #16 +; ret diff --git a/cranelift/filetests/filetests/isa/aarch64/atomic_load.clif b/cranelift/filetests/filetests/isa/aarch64/atomic_load.clif index a72474a42c..9d5ff8e132 100644 --- a/cranelift/filetests/filetests/isa/aarch64/atomic_load.clif +++ b/cranelift/filetests/filetests/isa/aarch64/atomic_load.clif @@ -7,14 +7,9 @@ block0(v0: i64): return v1 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: ldar x0, [x0] -; Inst 1: ret -; }} +; block0: +; ldar x0, [x0] +; ret function %atomic_load_i32(i64) -> i32 { block0(v0: i64): @@ -22,14 +17,9 @@ block0(v0: i64): return v1 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: ldar w0, [x0] -; Inst 1: ret -; }} +; block0: +; ldar w0, [x0] +; ret function %atomic_load_i16(i64) -> i16 { block0(v0: i64): @@ -37,14 +27,9 @@ block0(v0: i64): return v1 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: ldarh w0, [x0] -; Inst 1: ret -; }} +; block0: +; ldarh w0, [x0] +; ret function %atomic_load_i8(i64) -> i8 { block0(v0: i64): @@ -52,14 +37,9 @@ block0(v0: i64): return v1 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: ldarb w0, [x0] -; Inst 1: ret -; }} +; block0: +; ldarb w0, [x0] +; ret function %atomic_load_i32_i64(i64) -> i64 { block0(v0: i64): @@ -68,14 +48,9 @@ block0(v0: i64): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: ldar w0, [x0] -; Inst 1: ret -; }} +; block0: +; ldar w0, [x0] +; ret function %atomic_load_i16_i64(i64) -> i64 { block0(v0: i64): @@ -84,14 +59,9 @@ block0(v0: i64): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: ldarh w0, [x0] -; Inst 1: ret -; }} +; block0: +; ldarh w0, [x0] +; ret function %atomic_load_i8_i64(i64) -> i64 { block0(v0: i64): @@ -100,14 +70,9 @@ block0(v0: i64): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: ldarb w0, [x0] -; Inst 1: ret -; }} +; block0: +; ldarb w0, [x0] +; ret function %atomic_load_i16_i32(i64) -> i32 { block0(v0: i64): @@ -116,14 +81,9 @@ block0(v0: i64): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: ldarh w0, [x0] -; Inst 1: ret -; }} +; block0: +; ldarh w0, [x0] +; ret function %atomic_load_i8_i32(i64) -> i32 { block0(v0: i64): @@ -132,12 +92,7 @@ block0(v0: i64): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: ldarb w0, [x0] -; Inst 1: ret -; }} +; block0: +; ldarb w0, [x0] +; ret diff --git a/cranelift/filetests/filetests/isa/aarch64/atomic_store.clif b/cranelift/filetests/filetests/isa/aarch64/atomic_store.clif index 17b982a59b..63bea58d84 100644 --- a/cranelift/filetests/filetests/isa/aarch64/atomic_store.clif +++ b/cranelift/filetests/filetests/isa/aarch64/atomic_store.clif @@ -7,14 +7,9 @@ block0(v0: i64, v1: i64): return } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: stlr x0, [x1] -; Inst 1: ret -; }} +; block0: +; stlr x0, [x1] +; ret function %atomic_store_i32(i32, i64) { block0(v0: i32, v1: i64): @@ -22,14 +17,9 @@ block0(v0: i32, v1: i64): return } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: stlr w0, [x1] -; Inst 1: ret -; }} +; block0: +; stlr w0, [x1] +; ret function %atomic_store_i16(i16, i64) { block0(v0: i16, v1: i64): @@ -37,14 +27,9 @@ block0(v0: i16, v1: i64): return } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: stlrh w0, [x1] -; Inst 1: ret -; }} +; block0: +; stlrh w0, [x1] +; ret function %atomic_store_i8(i8, i64) { block0(v0: i8, v1: i64): @@ -52,14 +37,9 @@ block0(v0: i8, v1: i64): return } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: stlrb w0, [x1] -; Inst 1: ret -; }} +; block0: +; stlrb w0, [x1] +; ret function %atomic_store_i64_i32(i64, i64) { block0(v0: i64, v1: i64): @@ -68,14 +48,9 @@ block0(v0: i64, v1: i64): return } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: stlr w0, [x1] -; Inst 1: ret -; }} +; block0: +; stlr w0, [x1] +; ret function %atomic_store_i64_i16(i64, i64) { block0(v0: i64, v1: i64): @@ -84,14 +59,9 @@ block0(v0: i64, v1: i64): return } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: stlrh w0, [x1] -; Inst 1: ret -; }} +; block0: +; stlrh w0, [x1] +; ret function %atomic_store_i64_i8(i64, i64) { block0(v0: i64, v1: i64): @@ -100,14 +70,9 @@ block0(v0: i64, v1: i64): return } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: stlrb w0, [x1] -; Inst 1: ret -; }} +; block0: +; stlrb w0, [x1] +; ret function %atomic_store_i32_i16(i32, i64) { block0(v0: i32, v1: i64): @@ -116,14 +81,9 @@ block0(v0: i32, v1: i64): return } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: stlrh w0, [x1] -; Inst 1: ret -; }} +; block0: +; stlrh w0, [x1] +; ret function %atomic_store_i32_i8(i32, i64) { block0(v0: i32, v1: i64): @@ -132,12 +92,7 @@ block0(v0: i32, v1: i64): return } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: stlrb w0, [x1] -; Inst 1: ret -; }} +; block0: +; stlrb w0, [x1] +; ret diff --git a/cranelift/filetests/filetests/isa/aarch64/basic1.clif b/cranelift/filetests/filetests/isa/aarch64/basic1.clif index 4a0d0ae23b..a6caf19f9c 100644 --- a/cranelift/filetests/filetests/isa/aarch64/basic1.clif +++ b/cranelift/filetests/filetests/isa/aarch64/basic1.clif @@ -8,12 +8,7 @@ block0(v0: i32, v1: i32): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: add w0, w0, w1 -; Inst 1: ret -; }} +; block0: +; add w0, w0, w1 +; ret diff --git a/cranelift/filetests/filetests/isa/aarch64/bitops.clif b/cranelift/filetests/filetests/isa/aarch64/bitops.clif index 69a1a189fb..ef24f89690 100644 --- a/cranelift/filetests/filetests/isa/aarch64/bitops.clif +++ b/cranelift/filetests/filetests/isa/aarch64/bitops.clif @@ -8,15 +8,10 @@ block0(v0: i8): return v1 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 3) -; Inst 0: rbit w0, w0 -; Inst 1: lsr w0, w0, #24 -; Inst 2: ret -; }} +; block0: +; rbit w3, w0 +; lsr w0, w3, #24 +; ret function %a(i16) -> i16 { block0(v0: i16): @@ -24,15 +19,10 @@ block0(v0: i16): return v1 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 3) -; Inst 0: rbit w0, w0 -; Inst 1: lsr w0, w0, #16 -; Inst 2: ret -; }} +; block0: +; rbit w3, w0 +; lsr w0, w3, #16 +; ret function %a(i32) -> i32 { block0(v0: i32): @@ -40,14 +30,9 @@ block0(v0: i32): return v1 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: rbit w0, w0 -; Inst 1: ret -; }} +; block0: +; rbit w0, w0 +; ret function %a(i64) -> i64 { block0(v0: i64): @@ -55,14 +40,9 @@ block0(v0: i64): return v1 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: rbit x0, x0 -; Inst 1: ret -; }} +; block0: +; rbit x0, x0 +; ret function %a(i128) -> i128 { block0(v0: i128): @@ -70,16 +50,11 @@ block0(v0: i128): return v1 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 4) -; Inst 0: rbit x2, x0 -; Inst 1: rbit x0, x1 -; Inst 2: mov x1, x2 -; Inst 3: ret -; }} +; block0: +; rbit x6, x0 +; rbit x0, x1 +; mov x1, x6 +; ret function %b(i8) -> i8 { block0(v0: i8): @@ -87,16 +62,11 @@ block0(v0: i8): return v1 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 4) -; Inst 0: uxtb w0, w0 -; Inst 1: clz w0, w0 -; Inst 2: sub w0, w0, #24 -; Inst 3: ret -; }} +; block0: +; uxtb w3, w0 +; clz w5, w3 +; sub w0, w5, #24 +; ret function %b(i16) -> i16 { block0(v0: i16): @@ -104,16 +74,11 @@ block0(v0: i16): return v1 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 4) -; Inst 0: uxth w0, w0 -; Inst 1: clz w0, w0 -; Inst 2: sub w0, w0, #16 -; Inst 3: ret -; }} +; block0: +; uxth w3, w0 +; clz w5, w3 +; sub w0, w5, #16 +; ret function %b(i32) -> i32 { block0(v0: i32): @@ -121,14 +86,9 @@ block0(v0: i32): return v1 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: clz w0, w0 -; Inst 1: ret -; }} +; block0: +; clz w0, w0 +; ret function %b(i64) -> i64 { block0(v0: i64): @@ -136,14 +96,9 @@ block0(v0: i64): return v1 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: clz x0, x0 -; Inst 1: ret -; }} +; block0: +; clz x0, x0 +; ret function %b(i128) -> i128 { block0(v0: i128): @@ -151,18 +106,13 @@ block0(v0: i128): return v1 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 6) -; Inst 0: clz x1, x1 -; Inst 1: clz x0, x0 -; Inst 2: lsr x2, x1, #6 -; Inst 3: madd x0, x0, x2, x1 -; Inst 4: movz x1, #0 -; Inst 5: ret -; }} +; block0: +; clz x6, x1 +; clz x8, x0 +; lsr x10, x6, #6 +; madd x0, x8, x10, x6 +; movz x1, #0 +; ret function %c(i8) -> i8 { block0(v0: i8): @@ -170,16 +120,11 @@ block0(v0: i8): return v1 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 4) -; Inst 0: uxtb w0, w0 -; Inst 1: cls w0, w0 -; Inst 2: sub w0, w0, #24 -; Inst 3: ret -; }} +; block0: +; uxtb w3, w0 +; cls w5, w3 +; sub w0, w5, #24 +; ret function %c(i16) -> i16 { block0(v0: i16): @@ -187,16 +132,11 @@ block0(v0: i16): return v1 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 4) -; Inst 0: uxth w0, w0 -; Inst 1: cls w0, w0 -; Inst 2: sub w0, w0, #16 -; Inst 3: ret -; }} +; block0: +; uxth w3, w0 +; cls w5, w3 +; sub w0, w5, #16 +; ret function %c(i32) -> i32 { block0(v0: i32): @@ -204,14 +144,9 @@ block0(v0: i32): return v1 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: cls w0, w0 -; Inst 1: ret -; }} +; block0: +; cls w0, w0 +; ret function %c(i64) -> i64 { block0(v0: i64): @@ -219,14 +154,9 @@ block0(v0: i64): return v1 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: cls x0, x0 -; Inst 1: ret -; }} +; block0: +; cls x0, x0 +; ret function %c(i128) -> i128 { block0(v0: i128): @@ -234,22 +164,17 @@ block0(v0: i128): return v1 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 10) -; Inst 0: cls x2, x0 -; Inst 1: cls x3, x1 -; Inst 2: eon x0, x1, x0 -; Inst 3: lsr x0, x0, #63 -; Inst 4: madd x0, x2, x0, x0 -; Inst 5: subs xzr, x3, #63 -; Inst 6: csel x0, x0, xzr, eq -; Inst 7: add x0, x0, x3 -; Inst 8: movz x1, #0 -; Inst 9: ret -; }} +; block0: +; cls x6, x0 +; cls x8, x1 +; eon x10, x1, x0 +; lsr x12, x10, #63 +; madd x14, x6, x12, x12 +; subs xzr, x8, #63 +; csel x1, x14, xzr, eq +; add x0, x1, x8 +; movz x1, #0 +; ret function %d(i8) -> i8 { block0(v0: i8): @@ -257,16 +182,11 @@ block0(v0: i8): return v1 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 4) -; Inst 0: rbit w0, w0 -; Inst 1: orr w0, w0, #8388608 -; Inst 2: clz w0, w0 -; Inst 3: ret -; }} +; block0: +; rbit w3, w0 +; orr w5, w3, #8388608 +; clz w0, w5 +; ret function %d(i16) -> i16 { block0(v0: i16): @@ -274,16 +194,11 @@ block0(v0: i16): return v1 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 4) -; Inst 0: rbit w0, w0 -; Inst 1: orr w0, w0, #32768 -; Inst 2: clz w0, w0 -; Inst 3: ret -; }} +; block0: +; rbit w3, w0 +; orr w5, w3, #32768 +; clz w0, w5 +; ret function %d(i32) -> i32 { block0(v0: i32): @@ -291,15 +206,10 @@ block0(v0: i32): return v1 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 3) -; Inst 0: rbit w0, w0 -; Inst 1: clz w0, w0 -; Inst 2: ret -; }} +; block0: +; rbit w3, w0 +; clz w0, w3 +; ret function %d(i64) -> i64 { block0(v0: i64): @@ -307,15 +217,10 @@ block0(v0: i64): return v1 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 3) -; Inst 0: rbit x0, x0 -; Inst 1: clz x0, x0 -; Inst 2: ret -; }} +; block0: +; rbit x3, x0 +; clz x0, x3 +; ret function %d(i128) -> i128 { block0(v0: i128): @@ -323,20 +228,15 @@ block0(v0: i128): return v1 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 8) -; Inst 0: rbit x0, x0 -; Inst 1: rbit x1, x1 -; Inst 2: clz x0, x0 -; Inst 3: clz x1, x1 -; Inst 4: lsr x2, x0, #6 -; Inst 5: madd x0, x1, x2, x0 -; Inst 6: movz x1, #0 -; Inst 7: ret -; }} +; block0: +; rbit x6, x0 +; rbit x8, x1 +; clz x10, x6 +; clz x12, x8 +; lsr x14, x10, #6 +; madd x0, x12, x14, x10 +; movz x1, #0 +; ret function %d(i128) -> i128 { block0(v0: i128): @@ -344,19 +244,19 @@ block0(v0: i128): return v1 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 7) -; Inst 0: fmov d0, x0 -; Inst 1: mov v0.d[1], x1 -; Inst 2: cnt v0.16b, v0.16b -; Inst 3: addv b0, v0.16b -; Inst 4: umov w0, v0.b[0] -; Inst 5: movz x1, #0 -; Inst 6: ret -; }} +; stp fp, lr, [sp, #-16]! +; mov fp, sp +; stp d11, d13, [sp, #-16]! +; block0: +; fmov d6, x0 +; mov v6.d[1], x1 +; cnt v11.16b, v6.16b +; addv b13, v11.16b +; umov w0, v13.b[0] +; movz x1, #0 +; ldp d11, d13, [sp], #16 +; ldp fp, lr, [sp], #16 +; ret function %d(i64) -> i64 { block0(v0: i64): @@ -364,17 +264,12 @@ block0(v0: i64): return v1 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 5) -; Inst 0: fmov d0, x0 -; Inst 1: cnt v0.8b, v0.8b -; Inst 2: addv b0, v0.8b -; Inst 3: umov w0, v0.b[0] -; Inst 4: ret -; }} +; block0: +; fmov d3, x0 +; cnt v5.8b, v3.8b +; addv b7, v5.8b +; umov w0, v7.b[0] +; ret function %d(i32) -> i32 { block0(v0: i32): @@ -382,17 +277,12 @@ block0(v0: i32): return v1 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 5) -; Inst 0: fmov s0, w0 -; Inst 1: cnt v0.8b, v0.8b -; Inst 2: addv b0, v0.8b -; Inst 3: umov w0, v0.b[0] -; Inst 4: ret -; }} +; block0: +; fmov s3, w0 +; cnt v5.8b, v3.8b +; addv b7, v5.8b +; umov w0, v7.b[0] +; ret function %d(i16) -> i16 { block0(v0: i16): @@ -400,17 +290,12 @@ block0(v0: i16): return v1 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 5) -; Inst 0: fmov s0, w0 -; Inst 1: cnt v0.8b, v0.8b -; Inst 2: addp v0.8b, v0.8b, v0.8b -; Inst 3: umov w0, v0.b[0] -; Inst 4: ret -; }} +; block0: +; fmov s3, w0 +; cnt v5.8b, v3.8b +; addp v7.8b, v5.8b, v5.8b +; umov w0, v7.b[0] +; ret function %d(i8) -> i8 { block0(v0: i8): @@ -418,16 +303,11 @@ block0(v0: i8): return v1 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 4) -; Inst 0: fmov s0, w0 -; Inst 1: cnt v0.8b, v0.8b -; Inst 2: umov w0, v0.b[0] -; Inst 3: ret -; }} +; block0: +; fmov s3, w0 +; cnt v5.8b, v3.8b +; umov w0, v5.b[0] +; ret function %bextend_b8() -> b32 { block0: @@ -436,15 +316,10 @@ block0: return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 3) -; Inst 0: movz x0, #255 -; Inst 1: sxtb w0, w0 -; Inst 2: ret -; }} +; block0: +; movz x2, #255 +; sxtb w0, w2 +; ret function %bextend_b1() -> b32 { block0: @@ -453,15 +328,10 @@ block0: return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 3) -; Inst 0: movz x0, #1 -; Inst 1: sbfx w0, w0, #0, #1 -; Inst 2: ret -; }} +; block0: +; movz x2, #1 +; sbfx w0, w2, #0, #1 +; ret function %bnot_i32(i32) -> i32 { block0(v0: i32): @@ -469,14 +339,9 @@ block0(v0: i32): return v1 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: orn w0, wzr, w0 -; Inst 1: ret -; }} +; block0: +; orn w0, wzr, w0 +; ret function %bnot_i64(i64) -> i64 { block0(v0: i64): @@ -484,14 +349,9 @@ block0(v0: i64): return v1 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: orn x0, xzr, x0 -; Inst 1: ret -; }} +; block0: +; orn x0, xzr, x0 +; ret function %bnot_i64_with_shift(i64) -> i64 { block0(v0: i64): @@ -501,14 +361,9 @@ block0(v0: i64): return v3 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: orn x0, xzr, x0, LSL 3 -; Inst 1: ret -; }} +; block0: +; orn x0, xzr, x0, LSL 3 +; ret function %bnot_i128(i128) -> i128 { block0(v0: i128): @@ -516,15 +371,10 @@ block0(v0: i128): return v1 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 3) -; Inst 0: orn x0, xzr, x0 -; Inst 1: orn x1, xzr, x1 -; Inst 2: ret -; }} +; block0: +; orn x0, xzr, x0 +; orn x1, xzr, x1 +; ret function %bnot_i8x16(i8x16) -> i8x16 { block0(v0: i8x16): @@ -532,14 +382,9 @@ block0(v0: i8x16): return v1 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: mvn v0.16b, v0.16b -; Inst 1: ret -; }} +; block0: +; mvn v0.16b, v0.16b +; ret function %band_i32(i32, i32) -> i32 { block0(v0: i32, v1: i32): @@ -547,14 +392,9 @@ block0(v0: i32, v1: i32): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: and w0, w0, w1 -; Inst 1: ret -; }} +; block0: +; and w0, w0, w1 +; ret function %band_i64(i64, i64) -> i64 { block0(v0: i64, v1: i64): @@ -562,14 +402,9 @@ block0(v0: i64, v1: i64): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: and x0, x0, x1 -; Inst 1: ret -; }} +; block0: +; and x0, x0, x1 +; ret function %band_i128(i128, i128) -> i128 { block0(v0: i128, v1: i128): @@ -577,15 +412,10 @@ block0(v0: i128, v1: i128): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 3) -; Inst 0: and x0, x0, x2 -; Inst 1: and x1, x1, x3 -; Inst 2: ret -; }} +; block0: +; and x0, x0, x2 +; and x1, x1, x3 +; ret function %band_i8x16(i8x16, i8x16) -> i8x16 { block0(v0: i8x16, v1: i8x16): @@ -593,14 +423,9 @@ block0(v0: i8x16, v1: i8x16): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: and v0.16b, v0.16b, v1.16b -; Inst 1: ret -; }} +; block0: +; and v0.16b, v0.16b, v1.16b +; ret function %band_i64_constant(i64) -> i64 { block0(v0: i64): @@ -609,14 +434,9 @@ block0(v0: i64): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: and x0, x0, #3 -; Inst 1: ret -; }} +; block0: +; and x0, x0, #3 +; ret function %band_i64_constant2(i64) -> i64 { block0(v0: i64): @@ -625,14 +445,9 @@ block0(v0: i64): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: and x0, x0, #3 -; Inst 1: ret -; }} +; block0: +; and x0, x0, #3 +; ret function %band_i64_constant_shift(i64, i64) -> i64 { block0(v0: i64, v1: i64): @@ -642,14 +457,9 @@ block0(v0: i64, v1: i64): return v4 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: and x0, x0, x1, LSL 3 -; Inst 1: ret -; }} +; block0: +; and x0, x0, x1, LSL 3 +; ret function %band_i64_constant_shift2(i64, i64) -> i64 { block0(v0: i64, v1: i64): @@ -659,14 +469,9 @@ block0(v0: i64, v1: i64): return v4 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: and x0, x0, x1, LSL 3 -; Inst 1: ret -; }} +; block0: +; and x0, x0, x1, LSL 3 +; ret function %bor_i32(i32, i32) -> i32 { block0(v0: i32, v1: i32): @@ -674,14 +479,9 @@ block0(v0: i32, v1: i32): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: orr w0, w0, w1 -; Inst 1: ret -; }} +; block0: +; orr w0, w0, w1 +; ret function %bor_i64(i64, i64) -> i64 { block0(v0: i64, v1: i64): @@ -689,14 +489,9 @@ block0(v0: i64, v1: i64): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: orr x0, x0, x1 -; Inst 1: ret -; }} +; block0: +; orr x0, x0, x1 +; ret function %bor_i128(i128, i128) -> i128 { block0(v0: i128, v1: i128): @@ -704,15 +499,10 @@ block0(v0: i128, v1: i128): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 3) -; Inst 0: orr x0, x0, x2 -; Inst 1: orr x1, x1, x3 -; Inst 2: ret -; }} +; block0: +; orr x0, x0, x2 +; orr x1, x1, x3 +; ret function %bor_i8x16(i8x16, i8x16) -> i8x16 { block0(v0: i8x16, v1: i8x16): @@ -720,14 +510,9 @@ block0(v0: i8x16, v1: i8x16): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: orr v0.16b, v0.16b, v1.16b -; Inst 1: ret -; }} +; block0: +; orr v0.16b, v0.16b, v1.16b +; ret function %bor_i64_constant(i64) -> i64 { block0(v0: i64): @@ -736,14 +521,9 @@ block0(v0: i64): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: orr x0, x0, #3 -; Inst 1: ret -; }} +; block0: +; orr x0, x0, #3 +; ret function %bor_i64_constant2(i64) -> i64 { block0(v0: i64): @@ -752,14 +532,9 @@ block0(v0: i64): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: orr x0, x0, #3 -; Inst 1: ret -; }} +; block0: +; orr x0, x0, #3 +; ret function %bor_i64_constant_shift(i64, i64) -> i64 { block0(v0: i64, v1: i64): @@ -769,14 +544,9 @@ block0(v0: i64, v1: i64): return v4 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: orr x0, x0, x1, LSL 3 -; Inst 1: ret -; }} +; block0: +; orr x0, x0, x1, LSL 3 +; ret function %bor_i64_constant_shift2(i64, i64) -> i64 { block0(v0: i64, v1: i64): @@ -786,14 +556,9 @@ block0(v0: i64, v1: i64): return v4 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: orr x0, x0, x1, LSL 3 -; Inst 1: ret -; }} +; block0: +; orr x0, x0, x1, LSL 3 +; ret function %bxor_i32(i32, i32) -> i32 { block0(v0: i32, v1: i32): @@ -801,14 +566,9 @@ block0(v0: i32, v1: i32): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: eor w0, w0, w1 -; Inst 1: ret -; }} +; block0: +; eor w0, w0, w1 +; ret function %bxor_i64(i64, i64) -> i64 { block0(v0: i64, v1: i64): @@ -816,14 +576,9 @@ block0(v0: i64, v1: i64): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: eor x0, x0, x1 -; Inst 1: ret -; }} +; block0: +; eor x0, x0, x1 +; ret function %bxor_i128(i128, i128) -> i128 { block0(v0: i128, v1: i128): @@ -831,15 +586,10 @@ block0(v0: i128, v1: i128): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 3) -; Inst 0: eor x0, x0, x2 -; Inst 1: eor x1, x1, x3 -; Inst 2: ret -; }} +; block0: +; eor x0, x0, x2 +; eor x1, x1, x3 +; ret function %bxor_i8x16(i8x16, i8x16) -> i8x16 { block0(v0: i8x16, v1: i8x16): @@ -847,14 +597,9 @@ block0(v0: i8x16, v1: i8x16): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: eor v0.16b, v0.16b, v1.16b -; Inst 1: ret -; }} +; block0: +; eor v0.16b, v0.16b, v1.16b +; ret function %bxor_i64_constant(i64) -> i64 { block0(v0: i64): @@ -863,14 +608,9 @@ block0(v0: i64): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: eor x0, x0, #3 -; Inst 1: ret -; }} +; block0: +; eor x0, x0, #3 +; ret function %bxor_i64_constant2(i64) -> i64 { block0(v0: i64): @@ -879,14 +619,9 @@ block0(v0: i64): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: eor x0, x0, #3 -; Inst 1: ret -; }} +; block0: +; eor x0, x0, #3 +; ret function %bxor_i64_constant_shift(i64, i64) -> i64 { block0(v0: i64, v1: i64): @@ -896,14 +631,9 @@ block0(v0: i64, v1: i64): return v4 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: eor x0, x0, x1, LSL 3 -; Inst 1: ret -; }} +; block0: +; eor x0, x0, x1, LSL 3 +; ret function %bxor_i64_constant_shift2(i64, i64) -> i64 { block0(v0: i64, v1: i64): @@ -913,14 +643,9 @@ block0(v0: i64, v1: i64): return v4 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: eor x0, x0, x1, LSL 3 -; Inst 1: ret -; }} +; block0: +; eor x0, x0, x1, LSL 3 +; ret function %band_not_i32(i32, i32) -> i32 { block0(v0: i32, v1: i32): @@ -928,14 +653,9 @@ block0(v0: i32, v1: i32): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: bic w0, w0, w1 -; Inst 1: ret -; }} +; block0: +; bic w0, w0, w1 +; ret function %band_not_i64(i64, i64) -> i64 { block0(v0: i64, v1: i64): @@ -943,14 +663,9 @@ block0(v0: i64, v1: i64): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: bic x0, x0, x1 -; Inst 1: ret -; }} +; block0: +; bic x0, x0, x1 +; ret function %band_not_i128(i128, i128) -> i128 { block0(v0: i128, v1: i128): @@ -958,15 +673,10 @@ block0(v0: i128, v1: i128): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 3) -; Inst 0: bic x0, x0, x2 -; Inst 1: bic x1, x1, x3 -; Inst 2: ret -; }} +; block0: +; bic x0, x0, x2 +; bic x1, x1, x3 +; ret function %band_not_i8x16(i8x16, i8x16) -> i8x16 { block0(v0: i8x16, v1: i8x16): @@ -974,14 +684,9 @@ block0(v0: i8x16, v1: i8x16): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: bic v0.16b, v0.16b, v1.16b -; Inst 1: ret -; }} +; block0: +; bic v0.16b, v0.16b, v1.16b +; ret function %band_not_i64_constant(i64) -> i64 { block0(v0: i64): @@ -990,14 +695,9 @@ block0(v0: i64): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: bic x0, x0, #4 -; Inst 1: ret -; }} +; block0: +; bic x0, x0, #4 +; ret function %band_not_i64_constant_shift(i64, i64) -> i64 { block0(v0: i64, v1: i64): @@ -1007,14 +707,9 @@ block0(v0: i64, v1: i64): return v4 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: bic x0, x0, x1, LSL 4 -; Inst 1: ret -; }} +; block0: +; bic x0, x0, x1, LSL 4 +; ret function %bor_not_i32(i32, i32) -> i32 { block0(v0: i32, v1: i32): @@ -1022,14 +717,9 @@ block0(v0: i32, v1: i32): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: orn w0, w0, w1 -; Inst 1: ret -; }} +; block0: +; orn w0, w0, w1 +; ret function %bor_not_i64(i64, i64) -> i64 { block0(v0: i64, v1: i64): @@ -1037,14 +727,9 @@ block0(v0: i64, v1: i64): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: orn x0, x0, x1 -; Inst 1: ret -; }} +; block0: +; orn x0, x0, x1 +; ret function %bor_not_i128(i128, i128) -> i128 { block0(v0: i128, v1: i128): @@ -1052,15 +737,10 @@ block0(v0: i128, v1: i128): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 3) -; Inst 0: orn x0, x0, x2 -; Inst 1: orn x1, x1, x3 -; Inst 2: ret -; }} +; block0: +; orn x0, x0, x2 +; orn x1, x1, x3 +; ret function %bor_not_i64_constant(i64) -> i64 { block0(v0: i64): @@ -1069,14 +749,9 @@ block0(v0: i64): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: orn x0, x0, #4 -; Inst 1: ret -; }} +; block0: +; orn x0, x0, #4 +; ret function %bor_not_i64_constant_shift(i64, i64) -> i64 { block0(v0: i64, v1: i64): @@ -1086,14 +761,9 @@ block0(v0: i64, v1: i64): return v4 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: orn x0, x0, x1, LSL 4 -; Inst 1: ret -; }} +; block0: +; orn x0, x0, x1, LSL 4 +; ret function %bxor_not_i32(i32, i32) -> i32 { block0(v0: i32, v1: i32): @@ -1101,14 +771,9 @@ block0(v0: i32, v1: i32): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: eon w0, w0, w1 -; Inst 1: ret -; }} +; block0: +; eon w0, w0, w1 +; ret function %bxor_not_i64(i64, i64) -> i64 { block0(v0: i64, v1: i64): @@ -1116,14 +781,9 @@ block0(v0: i64, v1: i64): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: eon x0, x0, x1 -; Inst 1: ret -; }} +; block0: +; eon x0, x0, x1 +; ret function %bxor_not_i128(i128, i128) -> i128 { block0(v0: i128, v1: i128): @@ -1131,15 +791,10 @@ block0(v0: i128, v1: i128): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 3) -; Inst 0: eon x0, x0, x2 -; Inst 1: eon x1, x1, x3 -; Inst 2: ret -; }} +; block0: +; eon x0, x0, x2 +; eon x1, x1, x3 +; ret function %bxor_not_i64_constant(i64) -> i64 { block0(v0: i64): @@ -1148,14 +803,9 @@ block0(v0: i64): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: eon x0, x0, #4 -; Inst 1: ret -; }} +; block0: +; eon x0, x0, #4 +; ret function %bxor_not_i64_constant_shift(i64, i64) -> i64 { block0(v0: i64, v1: i64): @@ -1165,14 +815,9 @@ block0(v0: i64, v1: i64): return v4 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: eon x0, x0, x1, LSL 4 -; Inst 1: ret -; }} +; block0: +; eon x0, x0, x1, LSL 4 +; ret function %ishl_i128_i8(i128, i8) -> i128 { block0(v0: i128, v1: i8): @@ -1180,25 +825,17 @@ block0(v0: i128, v1: i8): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 13) -; Inst 0: lsl x4, x0, x2 -; Inst 1: lsl x3, x1, x2 -; Inst 2: orn w1, wzr, w2 -; Inst 3: lsr x0, x0, #1 -; Inst 4: lsr x0, x0, x1 -; Inst 5: orr x0, x3, x0 -; Inst 6: ands xzr, x2, #64 -; Inst 7: csel x1, xzr, x4, ne -; Inst 8: csel x0, x4, x0, ne -; Inst 9: mov x2, x0 -; Inst 10: mov x0, x1 -; Inst 11: mov x1, x2 -; Inst 12: ret -; }} +; block0: +; lsl x8, x0, x2 +; lsl x10, x1, x2 +; orn w12, wzr, w2 +; lsr x14, x0, #1 +; lsr x0, x14, x12 +; orr x3, x10, x0 +; ands xzr, x2, #64 +; csel x0, xzr, x8, ne +; csel x1, x8, x3, ne +; ret function %ishl_i128_i128(i128, i128) -> i128 { block0(v0: i128, v1: i128): @@ -1206,25 +843,17 @@ block0(v0: i128, v1: i128): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 13) -; Inst 0: lsl x3, x0, x2 -; Inst 1: lsl x1, x1, x2 -; Inst 2: orn w4, wzr, w2 -; Inst 3: lsr x0, x0, #1 -; Inst 4: lsr x0, x0, x4 -; Inst 5: orr x0, x1, x0 -; Inst 6: ands xzr, x2, #64 -; Inst 7: csel x1, xzr, x3, ne -; Inst 8: csel x0, x3, x0, ne -; Inst 9: mov x2, x0 -; Inst 10: mov x0, x1 -; Inst 11: mov x1, x2 -; Inst 12: ret -; }} +; block0: +; lsl x10, x0, x2 +; lsl x12, x1, x2 +; orn w14, wzr, w2 +; lsr x0, x0, #1 +; lsr x3, x0, x14 +; orr x4, x12, x3 +; ands xzr, x2, #64 +; csel x0, xzr, x10, ne +; csel x1, x10, x4, ne +; ret function %ushr_i128_i8(i128, i8) -> i128 { block0(v0: i128, v1: i8): @@ -1232,25 +861,17 @@ block0(v0: i128, v1: i8): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 13) -; Inst 0: lsr x3, x0, x2 -; Inst 1: lsr x0, x1, x2 -; Inst 2: orn w4, wzr, w2 -; Inst 3: lsl x1, x1, #1 -; Inst 4: lsl x1, x1, x4 -; Inst 5: orr x1, x3, x1 -; Inst 6: ands xzr, x2, #64 -; Inst 7: csel x1, x0, x1, ne -; Inst 8: csel x0, xzr, x0, ne -; Inst 9: mov x2, x0 -; Inst 10: mov x0, x1 -; Inst 11: mov x1, x2 -; Inst 12: ret -; }} +; block0: +; lsr x8, x0, x2 +; lsr x10, x1, x2 +; orn w12, wzr, w2 +; lsl x14, x1, #1 +; lsl x0, x14, x12 +; orr x3, x8, x0 +; ands xzr, x2, #64 +; csel x0, x10, x3, ne +; csel x1, xzr, x10, ne +; ret function %ushr_i128_i128(i128, i128) -> i128 { block0(v0: i128, v1: i128): @@ -1258,25 +879,17 @@ block0(v0: i128, v1: i128): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 13) -; Inst 0: lsr x3, x0, x2 -; Inst 1: lsr x0, x1, x2 -; Inst 2: orn w4, wzr, w2 -; Inst 3: lsl x1, x1, #1 -; Inst 4: lsl x1, x1, x4 -; Inst 5: orr x1, x3, x1 -; Inst 6: ands xzr, x2, #64 -; Inst 7: csel x1, x0, x1, ne -; Inst 8: csel x0, xzr, x0, ne -; Inst 9: mov x2, x0 -; Inst 10: mov x0, x1 -; Inst 11: mov x1, x2 -; Inst 12: ret -; }} +; block0: +; lsr x10, x0, x2 +; lsr x12, x1, x2 +; orn w14, wzr, w2 +; lsl x0, x1, #1 +; lsl x3, x0, x14 +; orr x4, x10, x3 +; ands xzr, x2, #64 +; csel x0, x12, x4, ne +; csel x1, xzr, x12, ne +; ret function %sshr_i128_i8(i128, i8) -> i128 { block0(v0: i128, v1: i8): @@ -1284,25 +897,18 @@ block0(v0: i128, v1: i8): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 13) -; Inst 0: lsr x3, x0, x2 -; Inst 1: asr x0, x1, x2 -; Inst 2: orn w4, wzr, w2 -; Inst 3: lsl x5, x1, #1 -; Inst 4: lsl x4, x5, x4 -; Inst 5: asr x1, x1, #63 -; Inst 6: orr x3, x3, x4 -; Inst 7: ands xzr, x2, #64 -; Inst 8: csel x2, x0, x3, ne -; Inst 9: csel x0, x1, x0, ne -; Inst 10: mov x1, x0 -; Inst 11: mov x0, x2 -; Inst 12: ret -; }} +; block0: +; lsr x8, x0, x2 +; asr x10, x1, x2 +; orn w12, wzr, w2 +; lsl x14, x1, #1 +; lsl x0, x14, x12 +; asr x3, x1, #63 +; orr x4, x8, x0 +; ands xzr, x2, #64 +; csel x0, x10, x4, ne +; csel x1, x3, x10, ne +; ret function %sshr_i128_i128(i128, i128) -> i128 { block0(v0: i128, v1: i128): @@ -1310,23 +916,16 @@ block0(v0: i128, v1: i128): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 13) -; Inst 0: lsr x3, x0, x2 -; Inst 1: asr x0, x1, x2 -; Inst 2: orn w4, wzr, w2 -; Inst 3: lsl x5, x1, #1 -; Inst 4: lsl x4, x5, x4 -; Inst 5: asr x1, x1, #63 -; Inst 6: orr x3, x3, x4 -; Inst 7: ands xzr, x2, #64 -; Inst 8: csel x2, x0, x3, ne -; Inst 9: csel x0, x1, x0, ne -; Inst 10: mov x1, x0 -; Inst 11: mov x0, x2 -; Inst 12: ret -; }} +; block0: +; lsr x10, x0, x2 +; asr x12, x1, x2 +; orn w14, wzr, w2 +; lsl x0, x1, #1 +; lsl x3, x0, x14 +; asr x4, x1, #63 +; orr x6, x10, x3 +; ands xzr, x2, #64 +; csel x0, x12, x6, ne +; csel x1, x4, x12, ne +; ret diff --git a/cranelift/filetests/filetests/isa/aarch64/call-indirect.clif b/cranelift/filetests/filetests/isa/aarch64/call-indirect.clif index 1d71b9038d..ff0dcd2da5 100644 --- a/cranelift/filetests/filetests/isa/aarch64/call-indirect.clif +++ b/cranelift/filetests/filetests/isa/aarch64/call-indirect.clif @@ -9,15 +9,10 @@ block0(v0: i64, v1: i64): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 5) -; Inst 0: stp fp, lr, [sp, #-16]! -; Inst 1: mov fp, sp -; Inst 2: blr x1 -; Inst 3: ldp fp, lr, [sp], #16 -; Inst 4: ret -; }} +; stp fp, lr, [sp, #-16]! +; mov fp, sp +; block0: +; blr x1 +; ldp fp, lr, [sp], #16 +; ret diff --git a/cranelift/filetests/filetests/isa/aarch64/call.clif b/cranelift/filetests/filetests/isa/aarch64/call.clif index 21ba70c987..41ed9a3e9d 100644 --- a/cranelift/filetests/filetests/isa/aarch64/call.clif +++ b/cranelift/filetests/filetests/isa/aarch64/call.clif @@ -1,4 +1,4 @@ -test compile +test compile precise-output set unwind_info=false set enable_probestack=false target aarch64 @@ -11,12 +11,13 @@ block0(v0: i64): return v1 } -; check: stp fp, lr, [sp, #-16]! -; nextln: mov fp, sp -; nextln: ldr x1, 8 ; b 12 ; data -; nextln: blr x1 -; nextln: ldp fp, lr, [sp], #16 -; nextln: ret +; stp fp, lr, [sp, #-16]! +; mov fp, sp +; block0: +; ldr x5, 8 ; b 12 ; data TestCase { length: 1, ascii: [103, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] } + 0 +; blr x5 +; ldp fp, lr, [sp], #16 +; ret function %f2(i32) -> i64 { fn0 = %g(i32 uext) -> i64 baldrdash_system_v @@ -26,20 +27,40 @@ block0(v0: i32): return v1 } -; check: stp fp, lr, [sp, #-16]! -; nextln: mov fp, sp -; check: mov w0, w0 -; nextln: ldr x1, 8 ; b 12 ; data -; nextln: blr x1 -; check: ldp fp, lr, [sp], #16 -; nextln: ret +; stp fp, lr, [sp, #-16]! +; mov fp, sp +; stp x27, x28, [sp, #-16]! +; stp x25, x26, [sp, #-16]! +; stp x23, x24, [sp, #-16]! +; stp x21, x22, [sp, #-16]! +; stp x19, x20, [sp, #-16]! +; stp d14, d15, [sp, #-16]! +; stp d12, d13, [sp, #-16]! +; stp d10, d11, [sp, #-16]! +; stp d8, d9, [sp, #-16]! +; block0: +; mov w0, w0 +; ldr x5, 8 ; b 12 ; data TestCase { length: 1, ascii: [103, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] } + 0 +; blr x5 +; ldp d8, d9, [sp], #16 +; ldp d10, d11, [sp], #16 +; ldp d12, d13, [sp], #16 +; ldp d14, d15, [sp], #16 +; ldp x19, x20, [sp], #16 +; ldp x21, x22, [sp], #16 +; ldp x23, x24, [sp], #16 +; ldp x25, x26, [sp], #16 +; ldp x27, x28, [sp], #16 +; ldp fp, lr, [sp], #16 +; ret function %f3(i32) -> i32 uext baldrdash_system_v { block0(v0: i32): return v0 } -; check: mov w0, w0 +; block0: +; mov w0, w0 function %f4(i32) -> i64 { fn0 = %g(i32 sext) -> i64 baldrdash_system_v @@ -49,20 +70,40 @@ block0(v0: i32): return v1 } -; check: stp fp, lr, [sp, #-16]! -; nextln: mov fp, sp -; check: sxtw x0, w0 -; nextln: ldr x1, 8 ; b 12 ; data -; nextln: blr x1 -; check: ldp fp, lr, [sp], #16 -; nextln: ret +; stp fp, lr, [sp, #-16]! +; mov fp, sp +; stp x27, x28, [sp, #-16]! +; stp x25, x26, [sp, #-16]! +; stp x23, x24, [sp, #-16]! +; stp x21, x22, [sp, #-16]! +; stp x19, x20, [sp, #-16]! +; stp d14, d15, [sp, #-16]! +; stp d12, d13, [sp, #-16]! +; stp d10, d11, [sp, #-16]! +; stp d8, d9, [sp, #-16]! +; block0: +; sxtw x0, w0 +; ldr x5, 8 ; b 12 ; data TestCase { length: 1, ascii: [103, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] } + 0 +; blr x5 +; ldp d8, d9, [sp], #16 +; ldp d10, d11, [sp], #16 +; ldp d12, d13, [sp], #16 +; ldp d14, d15, [sp], #16 +; ldp x19, x20, [sp], #16 +; ldp x21, x22, [sp], #16 +; ldp x23, x24, [sp], #16 +; ldp x25, x26, [sp], #16 +; ldp x27, x28, [sp], #16 +; ldp fp, lr, [sp], #16 +; ret function %f5(i32) -> i32 sext baldrdash_system_v { block0(v0: i32): return v0 } -; check: sxtw x0, w0 +; block0: +; sxtw x0, w0 function %f6(i8) -> i64 { fn0 = %g(i32, i32, i32, i32, i32, i32, i32, i32, i8 sext) -> i64 @@ -73,26 +114,27 @@ block0(v0: i8): return v2 } -; check: stp fp, lr, [sp, #-16]! -; nextln: mov fp, sp -; nextln: mov x8, x0 -; nextln: sub sp, sp, #16 -; nextln: virtual_sp_offset_adjust 16 -; nextln: movz x0, #42 -; nextln: movz x1, #42 -; nextln: movz x2, #42 -; nextln: movz x3, #42 -; nextln: movz x4, #42 -; nextln: movz x5, #42 -; nextln: movz x6, #42 -; nextln: movz x7, #42 -; nextln: sturb w8, [sp] -; nextln: ldr x8, 8 ; b 12 ; data -; nextln: blr x8 -; nextln: add sp, sp, #16 -; nextln: virtual_sp_offset_adjust -16 -; nextln: ldp fp, lr, [sp], #16 -; nextln: ret +; stp fp, lr, [sp, #-16]! +; mov fp, sp +; block0: +; mov x15, x0 +; sub sp, sp, #16 +; virtual_sp_offset_adjust 16 +; movz x0, #42 +; movz x1, #42 +; movz x2, #42 +; movz x3, #42 +; movz x4, #42 +; movz x5, #42 +; movz x6, #42 +; movz x7, #42 +; strb w15, [sp] +; ldr x15, 8 ; b 12 ; data TestCase { length: 1, ascii: [103, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] } + 0 +; blr x15 +; add sp, sp, #16 +; virtual_sp_offset_adjust -16 +; ldp fp, lr, [sp], #16 +; ret function %f7(i8) -> i32, i32, i32, i32, i32, i32, i32, i32, i8 sext { block0(v0: i8): @@ -100,18 +142,19 @@ block0(v0: i8): return v1, v1, v1, v1, v1, v1, v1, v1, v0 } -; check: mov x9, x0 -; nextln: mov x8, x1 -; nextln: movz x0, #42 -; nextln: movz x1, #42 -; nextln: movz x2, #42 -; nextln: movz x3, #42 -; nextln: movz x4, #42 -; nextln: movz x5, #42 -; nextln: movz x6, #42 -; nextln: movz x7, #42 -; nextln: sturb w9, [x8] -; nextln: ret +; block0: +; mov x14, x0 +; mov x8, x1 +; movz x0, #42 +; movz x1, #42 +; movz x2, #42 +; movz x3, #42 +; movz x4, #42 +; movz x5, #42 +; movz x6, #42 +; movz x7, #42 +; strb w14, [x8] +; ret function %f8() { fn0 = %g0() -> f32 @@ -131,32 +174,33 @@ block0: return } -; check: stp fp, lr, [sp, #-16]! -; nextln: mov fp, sp -; nextln: sub sp, sp, #48 -; nextln: ldr x0, 8 ; b 12 ; data -; nextln: blr x0 -; nextln: str q0, [sp] -; nextln: ldr x0, 8 ; b 12 ; data -; nextln: blr x0 -; nextln: str q0, [sp, #16] -; nextln: ldr x0, 8 ; b 12 ; data -; nextln: blr x0 -; nextln: str q0, [sp, #32] -; nextln: ldr x0, 8 ; b 12 ; data -; nextln: blr x0 -; nextln: ldr q0, [sp] -; nextln: ldr x0, 8 ; b 12 ; data -; nextln: blr x0 -; nextln: ldr q0, [sp, #16] -; nextln: ldr x0, 8 ; b 12 ; data -; nextln: blr x0 -; nextln: ldr q0, [sp, #32] -; nextln: ldr x0, 8 ; b 12 ; data -; nextln: blr x0 -; nextln: add sp, sp, #48 -; nextln: ldp fp, lr, [sp], #16 -; nextln: ret +; stp fp, lr, [sp, #-16]! +; mov fp, sp +; sub sp, sp, #48 +; block0: +; ldr x9, 8 ; b 12 ; data TestCase { length: 2, ascii: [103, 48, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] } + 0 +; blr x9 +; str q0, [sp] +; ldr x11, 8 ; b 12 ; data TestCase { length: 2, ascii: [103, 49, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] } + 0 +; blr x11 +; str q0, [sp, #16] +; ldr x13, 8 ; b 12 ; data TestCase { length: 2, ascii: [103, 49, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] } + 0 +; blr x13 +; str q0, [sp, #32] +; ldr x15, 8 ; b 12 ; data TestCase { length: 2, ascii: [103, 50, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] } + 0 +; blr x15 +; ldr q0, [sp] +; ldr x1, 8 ; b 12 ; data TestCase { length: 2, ascii: [103, 51, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] } + 0 +; blr x1 +; ldr q0, [sp, #16] +; ldr x3, 8 ; b 12 ; data TestCase { length: 2, ascii: [103, 52, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] } + 0 +; blr x3 +; ldr q0, [sp, #32] +; ldr x5, 8 ; b 12 ; data TestCase { length: 2, ascii: [103, 52, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] } + 0 +; blr x5 +; add sp, sp, #48 +; ldp fp, lr, [sp], #16 +; ret function %f9() { fn0 = %g0() -> i8x16 @@ -174,32 +218,33 @@ block0: return } -; check: stp fp, lr, [sp, #-16]! -; nextln: mov fp, sp -; nextln: sub sp, sp, #48 -; nextln: ldr x0, 8 ; b 12 ; data -; nextln: blr x0 -; nextln: str q0, [sp] -; nextln: ldr x0, 8 ; b 12 ; data -; nextln: blr x0 -; nextln: str q0, [sp, #16] -; nextln: ldr x0, 8 ; b 12 ; data -; nextln: blr x0 -; nextln: str q0, [sp, #32] -; nextln: ldr x0, 8 ; b 12 ; data -; nextln: blr x0 -; nextln: ldr q0, [sp] -; nextln: ldr x0, 8 ; b 12 ; data -; nextln: blr x0 -; nextln: ldr q0, [sp, #16] -; nextln: ldr x0, 8 ; b 12 ; data -; nextln: blr x0 -; nextln: ldr q0, [sp, #32] -; nextln: ldr x0, 8 ; b 12 ; data -; nextln: blr x0 -; nextln: add sp, sp, #48 -; nextln: ldp fp, lr, [sp], #16 -; nextln: ret +; stp fp, lr, [sp, #-16]! +; mov fp, sp +; sub sp, sp, #48 +; block0: +; ldr x9, 8 ; b 12 ; data TestCase { length: 2, ascii: [103, 48, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] } + 0 +; blr x9 +; str q0, [sp] +; ldr x11, 8 ; b 12 ; data TestCase { length: 2, ascii: [103, 48, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] } + 0 +; blr x11 +; str q0, [sp, #16] +; ldr x13, 8 ; b 12 ; data TestCase { length: 2, ascii: [103, 48, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] } + 0 +; blr x13 +; str q0, [sp, #32] +; ldr x15, 8 ; b 12 ; data TestCase { length: 2, ascii: [103, 49, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] } + 0 +; blr x15 +; ldr q0, [sp] +; ldr x1, 8 ; b 12 ; data TestCase { length: 2, ascii: [103, 50, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] } + 0 +; blr x1 +; ldr q0, [sp, #16] +; ldr x3, 8 ; b 12 ; data TestCase { length: 2, ascii: [103, 50, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] } + 0 +; blr x3 +; ldr q0, [sp, #32] +; ldr x5, 8 ; b 12 ; data TestCase { length: 2, ascii: [103, 50, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] } + 0 +; blr x5 +; add sp, sp, #48 +; ldp fp, lr, [sp], #16 +; ret function %f10() { fn0 = %g0() -> f32 @@ -221,44 +266,43 @@ block0: return } -; check: stp fp, lr, [sp, #-16]! -; nextln: mov fp, sp -; nextln: sub sp, sp, #48 -; nextln: ldr x0, 8 ; b 12 ; data -; nextln: blr x0 -; nextln: str q0, [sp] -; nextln: ldr x0, 8 ; b 12 ; data -; nextln: blr x0 -; nextln: str q0, [sp, #16] -; nextln: ldr x0, 8 ; b 12 ; data -; nextln: blr x0 -; nextln: str q0, [sp, #32] -; nextln: ldr x0, 8 ; b 12 ; data -; nextln: blr x0 -; nextln: ldr q0, [sp] -; nextln: ldr x0, 8 ; b 12 ; data -; nextln: blr x0 -; nextln: ldr q0, [sp, #16] -; nextln: ldr x0, 8 ; b 12 ; data -; nextln: blr x0 -; nextln: ldr q0, [sp, #32] -; nextln: ldr x0, 8 ; b 12 ; data -; nextln: blr x0 -; nextln: add sp, sp, #48 -; nextln: ldp fp, lr, [sp], #16 -; nextln: ret +; stp fp, lr, [sp, #-16]! +; mov fp, sp +; sub sp, sp, #48 +; block0: +; ldr x9, 8 ; b 12 ; data TestCase { length: 2, ascii: [103, 48, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] } + 0 +; blr x9 +; str q0, [sp] +; ldr x11, 8 ; b 12 ; data TestCase { length: 2, ascii: [103, 49, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] } + 0 +; blr x11 +; str q0, [sp, #16] +; ldr x13, 8 ; b 12 ; data TestCase { length: 2, ascii: [103, 50, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] } + 0 +; blr x13 +; str q0, [sp, #32] +; ldr x15, 8 ; b 12 ; data TestCase { length: 2, ascii: [103, 51, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] } + 0 +; blr x15 +; ldr q0, [sp] +; ldr x1, 8 ; b 12 ; data TestCase { length: 2, ascii: [103, 52, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] } + 0 +; blr x1 +; ldr q0, [sp, #16] +; ldr x3, 8 ; b 12 ; data TestCase { length: 2, ascii: [103, 53, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] } + 0 +; blr x3 +; ldr q0, [sp, #32] +; ldr x5, 8 ; b 12 ; data TestCase { length: 2, ascii: [103, 54, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] } + 0 +; blr x5 +; add sp, sp, #48 +; ldp fp, lr, [sp], #16 +; ret - -; i128 tests function %f11(i128, i64) -> i64 { block0(v0: i128, v1: i64): v2, v3 = isplit v0 return v3 } -; check: mov x0, x1 -; nextln: ret - +; block0: +; mov x0, x1 +; ret function %f11_call(i64) -> i64 { fn0 = %f11(i128, i64) -> i64 @@ -270,28 +314,27 @@ block0(v0: i64): return v3 } -; check: stp fp, lr, [sp, #-16]! -; nextln: mov fp, sp -; nextln: mov x1, x0 -; nextln: movz x0, #42 -; nextln: movz x2, #42 -; nextln: ldr x3, 8 ; b 12 ; data -; nextln: blr x3 -; nextln: ldp fp, lr, [sp], #16 -; nextln: ret +; stp fp, lr, [sp, #-16]! +; mov fp, sp +; block0: +; mov x9, x0 +; movz x0, #42 +; mov x1, x9 +; movz x2, #42 +; ldr x14, 8 ; b 12 ; data TestCase { length: 3, ascii: [102, 49, 49, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] } + 0 +; blr x14 +; ldp fp, lr, [sp], #16 +; ret - -; The AArch64 ABI requires that the i128 argument be aligned -; and to be passed in x2 and x3 function %f12(i64, i128) -> i64 { block0(v0: i64, v1: i128): v2, v3 = isplit v1 return v2 } -; check: mov x0, x2 -; nextln: ret - +; block0: +; mov x0, x2 +; ret function %f12_call(i64) -> i64 { fn0 = %f12(i64, i128) -> i64 @@ -303,29 +346,26 @@ block0(v0: i64): return v3 } -; check: stp fp, lr, [sp, #-16]! -; nextln: mov fp, sp -; nextln: movz x3, #42 -; nextln: mov x2, x0 -; nextln: movz x0, #42 -; nextln: ldr x1, 8 ; b 12 ; data -; nextln: blr x1 -; nextln: ldp fp, lr, [sp], #16 -; nextln: ret +; stp fp, lr, [sp, #-16]! +; mov fp, sp +; block0: +; movz x3, #42 +; mov x2, x0 +; movz x0, #42 +; ldr x14, 8 ; b 12 ; data TestCase { length: 3, ascii: [102, 49, 50, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] } + 0 +; blr x14 +; ldp fp, lr, [sp], #16 +; ret - - -; The Apple AArch64 ABI allows the i128 argument to not be aligned -; and to be passed in x1 and x2 function %f13(i64, i128) -> i64 apple_aarch64 { block0(v0: i64, v1: i128): v2, v3 = isplit v1 return v2 } -; check: mov x0, x1 -; nextln: ret - +; block0: +; mov x0, x1 +; ret function %f13_call(i64) -> i64 apple_aarch64 { fn0 = %f13(i64, i128) -> i64 apple_aarch64 @@ -337,31 +377,29 @@ block0(v0: i64): return v3 } -; check: stp fp, lr, [sp, #-16]! -; nextln: mov fp, sp -; nextln: movz x2, #42 -; nextln: mov x1, x0 -; nextln: movz x0, #42 -; nextln: ldr x3, 8 ; b 12 ; data -; nextln: blr x3 -; nextln: ldp fp, lr, [sp], #16 -; nextln: ret +; stp fp, lr, [sp, #-16]! +; mov fp, sp +; block0: +; movz x2, #42 +; mov x1, x0 +; movz x0, #42 +; ldr x14, 8 ; b 12 ; data TestCase { length: 3, ascii: [102, 49, 51, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] } + 0 +; blr x14 +; ldp fp, lr, [sp], #16 +; ret - - -; We only have 8 registers to pass data in -; make sure we spill the last argument even though there is one slot available function %f14(i128, i128, i128, i64, i128) -> i128 { block0(v0: i128, v1: i128, v2: i128, v3: i64, v4: i128): return v4 } -; check: stp fp, lr, [sp, #-16]! -; nextln: mov fp, sp -; nextln: ldur x0, [fp, #16] -; nextln: ldur x1, [fp, #24] -; nextln: ldp fp, lr, [sp], #16 -; nextln: ret +; stp fp, lr, [sp, #-16]! +; mov fp, sp +; block0: +; ldr x0, [fp, #16] +; ldr x1, [fp, #24] +; ldp fp, lr, [sp], #16 +; ret function %f14_call(i128, i64) -> i128 { fn0 = %f14(i128, i128, i128, i64, i128) -> i128 @@ -371,50 +409,40 @@ block0(v0: i128, v1: i64): return v2 } -; check: stp fp, lr, [sp, #-16]! -; nextln: mov fp, sp +; stp fp, lr, [sp, #-16]! +; mov fp, sp +; block0: +; mov x14, x2 +; sub sp, sp, #16 +; virtual_sp_offset_adjust 16 +; mov x13, x0 +; mov x15, x1 +; mov x2, x13 +; mov x3, x15 +; mov x4, x13 +; mov x5, x15 +; mov x6, x14 +; str x13, [sp] +; str x15, [sp, #8] +; ldr x7, 8 ; b 12 ; data TestCase { length: 3, ascii: [102, 49, 52, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] } + 0 +; blr x7 +; add sp, sp, #16 +; virtual_sp_offset_adjust -16 +; ldp fp, lr, [sp], #16 +; ret -; TODO: Some codegen optimization possible here with x0,x1 moving to x7,x8 and then moving back -; nextln: mov x7, x0 -; nextln: mov x8, x1 -; nextln: mov x6, x2 -; nextln: sub sp, sp, #16 -; nextln: virtual_sp_offset_adjust 16 -; nextln: mov x0, x7 -; nextln: mov x1, x8 -; nextln: mov x2, x7 -; nextln: mov x3, x8 -; nextln: mov x4, x7 -; nextln: mov x5, x8 -; nextln: stur x7, [sp] -; nextln: stur x8, [sp, #8] - -; nextln: ldr x7, 8 ; b 12 ; data -; nextln: blr x7 -; nextln: add sp, sp, #16 -; nextln: virtual_sp_offset_adjust -16 - -; nextln: ldp fp, lr, [sp], #16 -; nextln: ret - - - -; We have one register slot available (Similar to %f14), however apple -; allows us to start i128 on non even numbered registers (x7 in this case). -; -; It is unspecified if we can split the i128 into x7 + the stack. -; In practice LLVM does not do this, so we are going to go with that. function %f15(i128, i128, i128, i64, i128) -> i128 apple_aarch64{ block0(v0: i128, v1: i128, v2: i128, v3: i64, v4: i128): return v4 } -; check: stp fp, lr, [sp, #-16]! -; nextln: mov fp, sp -; nextln: ldur x0, [fp, #16] -; nextln: ldur x1, [fp, #24] -; nextln: ldp fp, lr, [sp], #16 -; nextln: ret +; stp fp, lr, [sp, #-16]! +; mov fp, sp +; block0: +; ldr x0, [fp, #16] +; ldr x1, [fp, #24] +; ldp fp, lr, [sp], #16 +; ret function %f15_call(i128, i64) -> i128 apple_aarch64 { fn0 = %f15(i128, i128, i128, i64, i128) -> i128 apple_aarch64 @@ -424,31 +452,27 @@ block0(v0: i128, v1: i64): return v2 } -; check: stp fp, lr, [sp, #-16]! -; nextln: mov fp, sp - -; nextln: mov x7, x0 -; nextln: mov x8, x1 -; nextln: mov x6, x2 -; nextln: sub sp, sp, #16 -; nextln: virtual_sp_offset_adjust 16 -; nextln: mov x0, x7 -; nextln: mov x1, x8 -; nextln: mov x2, x7 -; nextln: mov x3, x8 -; nextln: mov x4, x7 -; nextln: mov x5, x8 -; nextln: stur x7, [sp] -; nextln: stur x8, [sp, #8] - -; nextln: ldr x7, 8 ; b 12 ; data -; nextln: blr x7 -; nextln: add sp, sp, #16 -; nextln: virtual_sp_offset_adjust -16 - -; nextln: ldp fp, lr, [sp], #16 -; nextln: ret - +; stp fp, lr, [sp, #-16]! +; mov fp, sp +; block0: +; mov x14, x2 +; sub sp, sp, #16 +; virtual_sp_offset_adjust 16 +; mov x13, x0 +; mov x15, x1 +; mov x2, x13 +; mov x3, x15 +; mov x4, x13 +; mov x5, x15 +; mov x6, x14 +; str x13, [sp] +; str x15, [sp, #8] +; ldr x7, 8 ; b 12 ; data TestCase { length: 3, ascii: [102, 49, 53, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] } + 0 +; blr x7 +; add sp, sp, #16 +; virtual_sp_offset_adjust -16 +; ldp fp, lr, [sp], #16 +; ret function %f16() -> i32, i32 wasmtime_system_v { block0: @@ -457,9 +481,10 @@ block0: return v0, v1 } -; check: mov x1, x0 -; nextln: movz x0, #0 -; nextln: movz x2, #1 -; nextln: stur w2, [x1] -; nextln: ret +; block0: +; mov x11, x0 +; movz x0, #0 +; movz x7, #1 +; str w7, [x11] +; ret diff --git a/cranelift/filetests/filetests/isa/aarch64/compare_zero.clif b/cranelift/filetests/filetests/isa/aarch64/compare_zero.clif index 1828d811e4..122ea536a4 100644 --- a/cranelift/filetests/filetests/isa/aarch64/compare_zero.clif +++ b/cranelift/filetests/filetests/isa/aarch64/compare_zero.clif @@ -10,14 +10,9 @@ block0(v0: i8x16): return v3 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: cmeq v0.16b, v0.16b, #0 -; Inst 1: ret -; }} +; block0: +; cmeq v0.16b, v0.16b, #0 +; ret function %f1(i16x8) -> b16x8 { block0(v0: i16x8): @@ -27,14 +22,9 @@ block0(v0: i16x8): return v3 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: cmeq v0.8h, v0.8h, #0 -; Inst 1: ret -; }} +; block0: +; cmeq v0.8h, v0.8h, #0 +; ret function %f2(i32x4) -> b32x4 { block0(v0: i32x4): @@ -44,15 +34,10 @@ block0(v0: i32x4): return v3 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 3) -; Inst 0: cmeq v0.4s, v0.4s, #0 -; Inst 1: mvn v0.16b, v0.16b -; Inst 2: ret -; }} +; block0: +; cmeq v3.4s, v0.4s, #0 +; mvn v0.16b, v3.16b +; ret function %f3(i64x2) -> b64x2 { block0(v0: i64x2): @@ -62,15 +47,10 @@ block0(v0: i64x2): return v3 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 3) -; Inst 0: cmeq v0.2d, v0.2d, #0 -; Inst 1: mvn v0.16b, v0.16b -; Inst 2: ret -; }} +; block0: +; cmeq v3.2d, v0.2d, #0 +; mvn v0.16b, v3.16b +; ret function %f4(i8x16) -> b8x16 { block0(v0: i8x16): @@ -80,14 +60,9 @@ block0(v0: i8x16): return v3 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: cmle v0.16b, v0.16b, #0 -; Inst 1: ret -; }} +; block0: +; cmle v0.16b, v0.16b, #0 +; ret function %f5(i16x8) -> b16x8 { block0(v0: i16x8): @@ -97,14 +72,9 @@ block0(v0: i16x8): return v3 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: cmge v0.8h, v0.8h, #0 -; Inst 1: ret -; }} +; block0: +; cmge v0.8h, v0.8h, #0 +; ret function %f6(i32x4) -> b32x4 { block0(v0: i32x4): @@ -114,14 +84,9 @@ block0(v0: i32x4): return v3 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: cmge v0.4s, v0.4s, #0 -; Inst 1: ret -; }} +; block0: +; cmge v0.4s, v0.4s, #0 +; ret function %f7(i64x2) -> b64x2 { block0(v0: i64x2): @@ -131,14 +96,9 @@ block0(v0: i64x2): return v3 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: cmle v0.2d, v0.2d, #0 -; Inst 1: ret -; }} +; block0: +; cmle v0.2d, v0.2d, #0 +; ret function %f8(i8x16) -> b8x16 { block0(v0: i8x16): @@ -148,14 +108,9 @@ block0(v0: i8x16): return v3 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: cmlt v0.16b, v0.16b, #0 -; Inst 1: ret -; }} +; block0: +; cmlt v0.16b, v0.16b, #0 +; ret function %f9(i16x8) -> b16x8 { block0(v0: i16x8): @@ -165,14 +120,9 @@ block0(v0: i16x8): return v3 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: cmgt v0.8h, v0.8h, #0 -; Inst 1: ret -; }} +; block0: +; cmgt v0.8h, v0.8h, #0 +; ret function %f10(i32x4) -> b32x4 { block0(v0: i32x4): @@ -182,14 +132,9 @@ block0(v0: i32x4): return v3 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: cmgt v0.4s, v0.4s, #0 -; Inst 1: ret -; }} +; block0: +; cmgt v0.4s, v0.4s, #0 +; ret function %f11(i64x2) -> b64x2 { block0(v0: i64x2): @@ -199,14 +144,9 @@ block0(v0: i64x2): return v3 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: cmlt v0.2d, v0.2d, #0 -; Inst 1: ret -; }} +; block0: +; cmlt v0.2d, v0.2d, #0 +; ret function %f12(f32x4) -> b32x4 { block0(v0: f32x4): @@ -216,14 +156,9 @@ block0(v0: f32x4): return v3 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: fcmeq v0.4s, v0.4s, #0.0 -; Inst 1: ret -; }} +; block0: +; fcmeq v0.4s, v0.4s, #0.0 +; ret function %f13(f64x2) -> b64x2 { block0(v0: f64x2): @@ -233,14 +168,9 @@ block0(v0: f64x2): return v3 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: fcmeq v0.2d, v0.2d, #0.0 -; Inst 1: ret -; }} +; block0: +; fcmeq v0.2d, v0.2d, #0.0 +; ret function %f14(f64x2) -> b64x2 { block0(v0: f64x2): @@ -250,15 +180,10 @@ block0(v0: f64x2): return v3 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 3) -; Inst 0: fcmeq v0.2d, v0.2d, #0.0 -; Inst 1: mvn v0.16b, v0.16b -; Inst 2: ret -; }} +; block0: +; fcmeq v3.2d, v0.2d, #0.0 +; mvn v0.16b, v3.16b +; ret function %f15(f32x4) -> b32x4 { block0(v0: f32x4): @@ -268,15 +193,10 @@ block0(v0: f32x4): return v3 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 3) -; Inst 0: fcmeq v0.4s, v0.4s, #0.0 -; Inst 1: mvn v0.16b, v0.16b -; Inst 2: ret -; }} +; block0: +; fcmeq v3.4s, v0.4s, #0.0 +; mvn v0.16b, v3.16b +; ret function %f16(f32x4) -> b32x4 { block0(v0: f32x4): @@ -286,14 +206,9 @@ block0(v0: f32x4): return v3 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: fcmle v0.4s, v0.4s, #0.0 -; Inst 1: ret -; }} +; block0: +; fcmle v0.4s, v0.4s, #0.0 +; ret function %f17(f64x2) -> b64x2 { block0(v0: f64x2): @@ -303,14 +218,9 @@ block0(v0: f64x2): return v3 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: fcmge v0.2d, v0.2d, #0.0 -; Inst 1: ret -; }} +; block0: +; fcmge v0.2d, v0.2d, #0.0 +; ret function %f18(f64x2) -> b64x2 { block0(v0: f64x2): @@ -320,14 +230,9 @@ block0(v0: f64x2): return v3 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: fcmge v0.2d, v0.2d, #0.0 -; Inst 1: ret -; }} +; block0: +; fcmge v0.2d, v0.2d, #0.0 +; ret function %f19(f32x4) -> b32x4 { block0(v0: f32x4): @@ -337,14 +242,9 @@ block0(v0: f32x4): return v3 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: fcmle v0.4s, v0.4s, #0.0 -; Inst 1: ret -; }} +; block0: +; fcmle v0.4s, v0.4s, #0.0 +; ret function %f20(f32x4) -> b32x4 { block0(v0: f32x4): @@ -354,14 +254,9 @@ block0(v0: f32x4): return v3 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: fcmlt v0.4s, v0.4s, #0.0 -; Inst 1: ret -; }} +; block0: +; fcmlt v0.4s, v0.4s, #0.0 +; ret function %f21(f64x2) -> b64x2 { block0(v0: f64x2): @@ -371,14 +266,9 @@ block0(v0: f64x2): return v3 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: fcmgt v0.2d, v0.2d, #0.0 -; Inst 1: ret -; }} +; block0: +; fcmgt v0.2d, v0.2d, #0.0 +; ret function %f22(f64x2) -> b64x2 { block0(v0: f64x2): @@ -388,14 +278,9 @@ block0(v0: f64x2): return v3 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: fcmgt v0.2d, v0.2d, #0.0 -; Inst 1: ret -; }} +; block0: +; fcmgt v0.2d, v0.2d, #0.0 +; ret function %f23(f32x4) -> b32x4 { block0(v0: f32x4): @@ -405,11 +290,7 @@ block0(v0: f32x4): return v3 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: fcmlt v0.4s, v0.4s, #0.0 -; Inst 1: ret -; }} +; block0: +; fcmlt v0.4s, v0.4s, #0.0 +; ret + diff --git a/cranelift/filetests/filetests/isa/aarch64/condbr.clif b/cranelift/filetests/filetests/isa/aarch64/condbr.clif index 1b61291530..9195757667 100644 --- a/cranelift/filetests/filetests/isa/aarch64/condbr.clif +++ b/cranelift/filetests/filetests/isa/aarch64/condbr.clif @@ -8,15 +8,10 @@ block0(v0: i64, v1: i64): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 3) -; Inst 0: subs xzr, x0, x1 -; Inst 1: cset x0, eq -; Inst 2: ret -; }} +; block0: +; subs xzr, x0, x1 +; cset x0, eq +; ret function %icmp_eq_i128(i128, i128) -> b1 { block0(v0: i128, v1: i128): @@ -24,17 +19,12 @@ block0(v0: i128, v1: i128): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 5) -; Inst 0: eor x0, x0, x2 -; Inst 1: eor x1, x1, x3 -; Inst 2: adds xzr, x0, x1 -; Inst 3: cset x0, eq -; Inst 4: ret -; }} +; block0: +; eor x10, x0, x2 +; eor x12, x1, x3 +; adds xzr, x10, x12 +; cset x0, eq +; ret function %icmp_ne_i128(i128, i128) -> b1 { block0(v0: i128, v1: i128): @@ -42,17 +32,12 @@ block0(v0: i128, v1: i128): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 5) -; Inst 0: eor x0, x0, x2 -; Inst 1: eor x1, x1, x3 -; Inst 2: adds xzr, x0, x1 -; Inst 3: cset x0, ne -; Inst 4: ret -; }} +; block0: +; eor x10, x0, x2 +; eor x12, x1, x3 +; adds xzr, x10, x12 +; cset x0, ne +; ret function %icmp_slt_i128(i128, i128) -> b1 { block0(v0: i128, v1: i128): @@ -60,18 +45,13 @@ block0(v0: i128, v1: i128): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 6) -; Inst 0: subs xzr, x0, x2 -; Inst 1: cset x0, lo -; Inst 2: subs xzr, x1, x3 -; Inst 3: cset x1, lt -; Inst 4: csel x0, x0, x1, eq -; Inst 5: ret -; }} +; block0: +; subs xzr, x0, x2 +; cset x11, lo +; subs xzr, x1, x3 +; cset x14, lt +; csel x0, x11, x14, eq +; ret function %icmp_ult_i128(i128, i128) -> b1 { block0(v0: i128, v1: i128): @@ -79,18 +59,13 @@ block0(v0: i128, v1: i128): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 6) -; Inst 0: subs xzr, x0, x2 -; Inst 1: cset x0, lo -; Inst 2: subs xzr, x1, x3 -; Inst 3: cset x1, lo -; Inst 4: csel x0, x0, x1, eq -; Inst 5: ret -; }} +; block0: +; subs xzr, x0, x2 +; cset x11, lo +; subs xzr, x1, x3 +; cset x14, lo +; csel x0, x11, x14, eq +; ret function %icmp_sle_i128(i128, i128) -> b1 { block0(v0: i128, v1: i128): @@ -98,18 +73,13 @@ block0(v0: i128, v1: i128): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 6) -; Inst 0: subs xzr, x0, x2 -; Inst 1: cset x0, ls -; Inst 2: subs xzr, x1, x3 -; Inst 3: cset x1, le -; Inst 4: csel x0, x0, x1, eq -; Inst 5: ret -; }} +; block0: +; subs xzr, x0, x2 +; cset x11, ls +; subs xzr, x1, x3 +; cset x14, le +; csel x0, x11, x14, eq +; ret function %icmp_ule_i128(i128, i128) -> b1 { block0(v0: i128, v1: i128): @@ -117,18 +87,13 @@ block0(v0: i128, v1: i128): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 6) -; Inst 0: subs xzr, x0, x2 -; Inst 1: cset x0, ls -; Inst 2: subs xzr, x1, x3 -; Inst 3: cset x1, ls -; Inst 4: csel x0, x0, x1, eq -; Inst 5: ret -; }} +; block0: +; subs xzr, x0, x2 +; cset x11, ls +; subs xzr, x1, x3 +; cset x14, ls +; csel x0, x11, x14, eq +; ret function %icmp_sgt_i128(i128, i128) -> b1 { block0(v0: i128, v1: i128): @@ -136,18 +101,13 @@ block0(v0: i128, v1: i128): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 6) -; Inst 0: subs xzr, x0, x2 -; Inst 1: cset x0, hi -; Inst 2: subs xzr, x1, x3 -; Inst 3: cset x1, gt -; Inst 4: csel x0, x0, x1, eq -; Inst 5: ret -; }} +; block0: +; subs xzr, x0, x2 +; cset x11, hi +; subs xzr, x1, x3 +; cset x14, gt +; csel x0, x11, x14, eq +; ret function %icmp_ugt_i128(i128, i128) -> b1 { block0(v0: i128, v1: i128): @@ -155,18 +115,13 @@ block0(v0: i128, v1: i128): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 6) -; Inst 0: subs xzr, x0, x2 -; Inst 1: cset x0, hi -; Inst 2: subs xzr, x1, x3 -; Inst 3: cset x1, hi -; Inst 4: csel x0, x0, x1, eq -; Inst 5: ret -; }} +; block0: +; subs xzr, x0, x2 +; cset x11, hi +; subs xzr, x1, x3 +; cset x14, hi +; csel x0, x11, x14, eq +; ret function %icmp_sge_i128(i128, i128) -> b1 { block0(v0: i128, v1: i128): @@ -174,18 +129,13 @@ block0(v0: i128, v1: i128): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 6) -; Inst 0: subs xzr, x0, x2 -; Inst 1: cset x0, hs -; Inst 2: subs xzr, x1, x3 -; Inst 3: cset x1, ge -; Inst 4: csel x0, x0, x1, eq -; Inst 5: ret -; }} +; block0: +; subs xzr, x0, x2 +; cset x11, hs +; subs xzr, x1, x3 +; cset x14, ge +; csel x0, x11, x14, eq +; ret function %icmp_uge_i128(i128, i128) -> b1 { block0(v0: i128, v1: i128): @@ -193,18 +143,13 @@ block0(v0: i128, v1: i128): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 6) -; Inst 0: subs xzr, x0, x2 -; Inst 1: cset x0, hs -; Inst 2: subs xzr, x1, x3 -; Inst 3: cset x1, hs -; Inst 4: csel x0, x0, x1, eq -; Inst 5: ret -; }} +; block0: +; subs xzr, x0, x2 +; cset x11, hs +; subs xzr, x1, x3 +; cset x14, hs +; csel x0, x11, x14, eq +; ret function %icmp_of_i128(i128, i128) -> b1 { block0(v0: i128, v1: i128): @@ -212,16 +157,11 @@ block0(v0: i128, v1: i128): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 4) -; Inst 0: adds xzr, x0, x2 -; Inst 1: adcs xzr, x1, x3 -; Inst 2: cset x0, vs -; Inst 3: ret -; }} +; block0: +; adds xzr, x0, x2 +; adcs xzr, x1, x3 +; cset x0, vs +; ret function %icmp_nof_i128(i128, i128) -> b1 { block0(v0: i128, v1: i128): @@ -229,16 +169,11 @@ block0(v0: i128, v1: i128): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 4) -; Inst 0: adds xzr, x0, x2 -; Inst 1: adcs xzr, x1, x3 -; Inst 2: cset x0, vc -; Inst 3: ret -; }} +; block0: +; adds xzr, x0, x2 +; adcs xzr, x1, x3 +; cset x0, vc +; ret function %f(i64, i64) -> i64 { block0(v0: i64, v1: i64): @@ -255,26 +190,15 @@ block2: return v5 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (successor: Block 1) -; (successor: Block 2) -; (instruction range: 0 .. 2) -; Inst 0: subs xzr, x0, x1 -; Inst 1: b.eq label1 ; b label2 -; Block 1: -; (original IR block: block1) -; (instruction range: 2 .. 4) -; Inst 2: movz x0, #1 -; Inst 3: ret -; Block 2: -; (original IR block: block2) -; (instruction range: 4 .. 6) -; Inst 4: movz x0, #2 -; Inst 5: ret -; }} +; block0: +; subs xzr, x0, x1 +; b.eq label1 ; b label2 +; block1: +; movz x0, #1 +; ret +; block2: +; movz x0, #2 +; ret function %f(i64, i64) -> i64 { block0(v0: i64, v1: i64): @@ -287,29 +211,16 @@ block1: return v4 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (successor: Block 1) -; (successor: Block 2) -; (instruction range: 0 .. 2) -; Inst 0: subs xzr, x0, x1 -; Inst 1: b.eq label1 ; b label2 -; Block 1: -; (successor: Block 3) -; (instruction range: 2 .. 3) -; Inst 2: b label3 -; Block 2: -; (successor: Block 3) -; (instruction range: 3 .. 4) -; Inst 3: b label3 -; Block 3: -; (original IR block: block1) -; (instruction range: 4 .. 6) -; Inst 4: movz x0, #1 -; Inst 5: ret -; }} +; block0: +; subs xzr, x0, x1 +; b.eq label1 ; b label2 +; block1: +; b label3 +; block2: +; b label3 +; block3: +; movz x0, #1 +; ret function %i128_brz(i128){ block0(v0: i128): @@ -321,28 +232,15 @@ block1: return } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (successor: Block 1) -; (successor: Block 2) -; (instruction range: 0 .. 2) -; Inst 0: orr x0, x0, x1 -; Inst 1: cbz x0, label1 ; b label2 -; Block 1: -; (successor: Block 3) -; (instruction range: 2 .. 3) -; Inst 2: b label3 -; Block 2: -; (successor: Block 3) -; (instruction range: 3 .. 4) -; Inst 3: b label3 -; Block 3: -; (original IR block: block1) -; (instruction range: 4 .. 5) -; Inst 4: ret -; }} +; block0: +; orr x4, x0, x1 +; cbz x4, label1 ; b label2 +; block1: +; b label3 +; block2: +; b label3 +; block3: +; ret function %i128_brnz(i128){ block0(v0: i128): @@ -354,28 +252,15 @@ block1: return } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (successor: Block 1) -; (successor: Block 2) -; (instruction range: 0 .. 2) -; Inst 0: orr x0, x0, x1 -; Inst 1: cbnz x0, label1 ; b label2 -; Block 1: -; (successor: Block 3) -; (instruction range: 2 .. 3) -; Inst 2: b label3 -; Block 2: -; (successor: Block 3) -; (instruction range: 3 .. 4) -; Inst 3: b label3 -; Block 3: -; (original IR block: block1) -; (instruction range: 4 .. 5) -; Inst 4: ret -; }} +; block0: +; orr x4, x0, x1 +; cbnz x4, label1 ; b label2 +; block1: +; b label3 +; block2: +; b label3 +; block3: +; ret function %i128_bricmp_eq(i128, i128) { block0(v0: i128, v1: i128): @@ -386,30 +271,17 @@ block1: return } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (successor: Block 1) -; (successor: Block 2) -; (instruction range: 0 .. 4) -; Inst 0: eor x0, x0, x2 -; Inst 1: eor x1, x1, x3 -; Inst 2: adds xzr, x0, x1 -; Inst 3: b.eq label1 ; b label2 -; Block 1: -; (successor: Block 3) -; (instruction range: 4 .. 5) -; Inst 4: b label3 -; Block 2: -; (successor: Block 3) -; (instruction range: 5 .. 6) -; Inst 5: b label3 -; Block 3: -; (original IR block: block1) -; (instruction range: 6 .. 7) -; Inst 6: ret -; }} +; block0: +; eor x8, x0, x2 +; eor x10, x1, x3 +; adds xzr, x8, x10 +; b.eq label1 ; b label2 +; block1: +; b label3 +; block2: +; b label3 +; block3: +; ret function %i128_bricmp_ne(i128, i128) { block0(v0: i128, v1: i128): @@ -420,30 +292,17 @@ block1: return } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (successor: Block 1) -; (successor: Block 2) -; (instruction range: 0 .. 4) -; Inst 0: eor x0, x0, x2 -; Inst 1: eor x1, x1, x3 -; Inst 2: adds xzr, x0, x1 -; Inst 3: b.ne label1 ; b label2 -; Block 1: -; (successor: Block 3) -; (instruction range: 4 .. 5) -; Inst 4: b label3 -; Block 2: -; (successor: Block 3) -; (instruction range: 5 .. 6) -; Inst 5: b label3 -; Block 3: -; (original IR block: block1) -; (instruction range: 6 .. 7) -; Inst 6: ret -; }} +; block0: +; eor x8, x0, x2 +; eor x10, x1, x3 +; adds xzr, x8, x10 +; b.ne label1 ; b label2 +; block1: +; b label3 +; block2: +; b label3 +; block3: +; ret function %i128_bricmp_slt(i128, i128) { block0(v0: i128, v1: i128): @@ -454,33 +313,20 @@ block1: return } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (successor: Block 1) -; (successor: Block 2) -; (instruction range: 0 .. 7) -; Inst 0: subs xzr, x0, x2 -; Inst 1: cset x0, lo -; Inst 2: subs xzr, x1, x3 -; Inst 3: cset x1, lt -; Inst 4: csel x0, x0, x1, eq -; Inst 5: subs xzr, xzr, x0 -; Inst 6: b.lt label1 ; b label2 -; Block 1: -; (successor: Block 3) -; (instruction range: 7 .. 8) -; Inst 7: b label3 -; Block 2: -; (successor: Block 3) -; (instruction range: 8 .. 9) -; Inst 8: b label3 -; Block 3: -; (original IR block: block1) -; (instruction range: 9 .. 10) -; Inst 9: ret -; }} +; block0: +; subs xzr, x0, x2 +; cset x9, lo +; subs xzr, x1, x3 +; cset x12, lt +; csel x9, x9, x12, eq +; subs xzr, xzr, x9 +; b.lt label1 ; b label2 +; block1: +; b label3 +; block2: +; b label3 +; block3: +; ret function %i128_bricmp_ult(i128, i128) { block0(v0: i128, v1: i128): @@ -491,33 +337,20 @@ block1: return } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (successor: Block 1) -; (successor: Block 2) -; (instruction range: 0 .. 7) -; Inst 0: subs xzr, x0, x2 -; Inst 1: cset x0, lo -; Inst 2: subs xzr, x1, x3 -; Inst 3: cset x1, lo -; Inst 4: csel x0, x0, x1, eq -; Inst 5: subs xzr, xzr, x0 -; Inst 6: b.lo label1 ; b label2 -; Block 1: -; (successor: Block 3) -; (instruction range: 7 .. 8) -; Inst 7: b label3 -; Block 2: -; (successor: Block 3) -; (instruction range: 8 .. 9) -; Inst 8: b label3 -; Block 3: -; (original IR block: block1) -; (instruction range: 9 .. 10) -; Inst 9: ret -; }} +; block0: +; subs xzr, x0, x2 +; cset x9, lo +; subs xzr, x1, x3 +; cset x12, lo +; csel x9, x9, x12, eq +; subs xzr, xzr, x9 +; b.lo label1 ; b label2 +; block1: +; b label3 +; block2: +; b label3 +; block3: +; ret function %i128_bricmp_sle(i128, i128) { block0(v0: i128, v1: i128): @@ -528,34 +361,21 @@ block1: return } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (successor: Block 1) -; (successor: Block 2) -; (instruction range: 0 .. 8) -; Inst 0: subs xzr, x0, x2 -; Inst 1: cset x0, ls -; Inst 2: subs xzr, x1, x3 -; Inst 3: cset x1, le -; Inst 4: csel x0, x0, x1, eq -; Inst 5: movz x1, #1 -; Inst 6: subs xzr, x1, x0 -; Inst 7: b.le label1 ; b label2 -; Block 1: -; (successor: Block 3) -; (instruction range: 8 .. 9) -; Inst 8: b label3 -; Block 2: -; (successor: Block 3) -; (instruction range: 9 .. 10) -; Inst 9: b label3 -; Block 3: -; (original IR block: block1) -; (instruction range: 10 .. 11) -; Inst 10: ret -; }} +; block0: +; subs xzr, x0, x2 +; cset x9, ls +; subs xzr, x1, x3 +; cset x12, le +; csel x9, x9, x12, eq +; movz x12, #1 +; subs xzr, x12, x9 +; b.le label1 ; b label2 +; block1: +; b label3 +; block2: +; b label3 +; block3: +; ret function %i128_bricmp_ule(i128, i128) { block0(v0: i128, v1: i128): @@ -566,34 +386,21 @@ block1: return } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (successor: Block 1) -; (successor: Block 2) -; (instruction range: 0 .. 8) -; Inst 0: subs xzr, x0, x2 -; Inst 1: cset x0, ls -; Inst 2: subs xzr, x1, x3 -; Inst 3: cset x1, ls -; Inst 4: csel x0, x0, x1, eq -; Inst 5: movz x1, #1 -; Inst 6: subs xzr, x1, x0 -; Inst 7: b.ls label1 ; b label2 -; Block 1: -; (successor: Block 3) -; (instruction range: 8 .. 9) -; Inst 8: b label3 -; Block 2: -; (successor: Block 3) -; (instruction range: 9 .. 10) -; Inst 9: b label3 -; Block 3: -; (original IR block: block1) -; (instruction range: 10 .. 11) -; Inst 10: ret -; }} +; block0: +; subs xzr, x0, x2 +; cset x9, ls +; subs xzr, x1, x3 +; cset x12, ls +; csel x9, x9, x12, eq +; movz x12, #1 +; subs xzr, x12, x9 +; b.ls label1 ; b label2 +; block1: +; b label3 +; block2: +; b label3 +; block3: +; ret function %i128_bricmp_sgt(i128, i128) { block0(v0: i128, v1: i128): @@ -604,33 +411,20 @@ block1: return } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (successor: Block 1) -; (successor: Block 2) -; (instruction range: 0 .. 7) -; Inst 0: subs xzr, x0, x2 -; Inst 1: cset x0, hi -; Inst 2: subs xzr, x1, x3 -; Inst 3: cset x1, gt -; Inst 4: csel x0, x0, x1, eq -; Inst 5: subs xzr, x0, xzr -; Inst 6: b.gt label1 ; b label2 -; Block 1: -; (successor: Block 3) -; (instruction range: 7 .. 8) -; Inst 7: b label3 -; Block 2: -; (successor: Block 3) -; (instruction range: 8 .. 9) -; Inst 8: b label3 -; Block 3: -; (original IR block: block1) -; (instruction range: 9 .. 10) -; Inst 9: ret -; }} +; block0: +; subs xzr, x0, x2 +; cset x9, hi +; subs xzr, x1, x3 +; cset x12, gt +; csel x9, x9, x12, eq +; subs xzr, x9, xzr +; b.gt label1 ; b label2 +; block1: +; b label3 +; block2: +; b label3 +; block3: +; ret function %i128_bricmp_ugt(i128, i128) { block0(v0: i128, v1: i128): @@ -641,33 +435,20 @@ block1: return } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (successor: Block 1) -; (successor: Block 2) -; (instruction range: 0 .. 7) -; Inst 0: subs xzr, x0, x2 -; Inst 1: cset x0, hi -; Inst 2: subs xzr, x1, x3 -; Inst 3: cset x1, hi -; Inst 4: csel x0, x0, x1, eq -; Inst 5: subs xzr, x0, xzr -; Inst 6: b.hi label1 ; b label2 -; Block 1: -; (successor: Block 3) -; (instruction range: 7 .. 8) -; Inst 7: b label3 -; Block 2: -; (successor: Block 3) -; (instruction range: 8 .. 9) -; Inst 8: b label3 -; Block 3: -; (original IR block: block1) -; (instruction range: 9 .. 10) -; Inst 9: ret -; }} +; block0: +; subs xzr, x0, x2 +; cset x9, hi +; subs xzr, x1, x3 +; cset x12, hi +; csel x9, x9, x12, eq +; subs xzr, x9, xzr +; b.hi label1 ; b label2 +; block1: +; b label3 +; block2: +; b label3 +; block3: +; ret function %i128_bricmp_sge(i128, i128) { block0(v0: i128, v1: i128): @@ -678,34 +459,21 @@ block1: return } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (successor: Block 1) -; (successor: Block 2) -; (instruction range: 0 .. 8) -; Inst 0: subs xzr, x0, x2 -; Inst 1: cset x0, hs -; Inst 2: subs xzr, x1, x3 -; Inst 3: cset x1, ge -; Inst 4: csel x0, x0, x1, eq -; Inst 5: movz x1, #1 -; Inst 6: subs xzr, x0, x1 -; Inst 7: b.ge label1 ; b label2 -; Block 1: -; (successor: Block 3) -; (instruction range: 8 .. 9) -; Inst 8: b label3 -; Block 2: -; (successor: Block 3) -; (instruction range: 9 .. 10) -; Inst 9: b label3 -; Block 3: -; (original IR block: block1) -; (instruction range: 10 .. 11) -; Inst 10: ret -; }} +; block0: +; subs xzr, x0, x2 +; cset x9, hs +; subs xzr, x1, x3 +; cset x12, ge +; csel x9, x9, x12, eq +; movz x12, #1 +; subs xzr, x9, x12 +; b.ge label1 ; b label2 +; block1: +; b label3 +; block2: +; b label3 +; block3: +; ret function %i128_bricmp_uge(i128, i128) { block0(v0: i128, v1: i128): @@ -716,34 +484,21 @@ block1: return } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (successor: Block 1) -; (successor: Block 2) -; (instruction range: 0 .. 8) -; Inst 0: subs xzr, x0, x2 -; Inst 1: cset x0, hs -; Inst 2: subs xzr, x1, x3 -; Inst 3: cset x1, hs -; Inst 4: csel x0, x0, x1, eq -; Inst 5: movz x1, #1 -; Inst 6: subs xzr, x0, x1 -; Inst 7: b.hs label1 ; b label2 -; Block 1: -; (successor: Block 3) -; (instruction range: 8 .. 9) -; Inst 8: b label3 -; Block 2: -; (successor: Block 3) -; (instruction range: 9 .. 10) -; Inst 9: b label3 -; Block 3: -; (original IR block: block1) -; (instruction range: 10 .. 11) -; Inst 10: ret -; }} +; block0: +; subs xzr, x0, x2 +; cset x9, hs +; subs xzr, x1, x3 +; cset x12, hs +; csel x9, x9, x12, eq +; movz x12, #1 +; subs xzr, x9, x12 +; b.hs label1 ; b label2 +; block1: +; b label3 +; block2: +; b label3 +; block3: +; ret function %i128_bricmp_of(i128, i128) { block0(v0: i128, v1: i128): @@ -754,29 +509,16 @@ block1: return } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (successor: Block 1) -; (successor: Block 2) -; (instruction range: 0 .. 3) -; Inst 0: adds xzr, x0, x2 -; Inst 1: adcs xzr, x1, x3 -; Inst 2: b.vs label1 ; b label2 -; Block 1: -; (successor: Block 3) -; (instruction range: 3 .. 4) -; Inst 3: b label3 -; Block 2: -; (successor: Block 3) -; (instruction range: 4 .. 5) -; Inst 4: b label3 -; Block 3: -; (original IR block: block1) -; (instruction range: 5 .. 6) -; Inst 5: ret -; }} +; block0: +; adds xzr, x0, x2 +; adcs xzr, x1, x3 +; b.vs label1 ; b label2 +; block1: +; b label3 +; block2: +; b label3 +; block3: +; ret function %i128_bricmp_nof(i128, i128) { block0(v0: i128, v1: i128): @@ -787,27 +529,14 @@ block1: return } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (successor: Block 1) -; (successor: Block 2) -; (instruction range: 0 .. 3) -; Inst 0: adds xzr, x0, x2 -; Inst 1: adcs xzr, x1, x3 -; Inst 2: b.vc label1 ; b label2 -; Block 1: -; (successor: Block 3) -; (instruction range: 3 .. 4) -; Inst 3: b label3 -; Block 2: -; (successor: Block 3) -; (instruction range: 4 .. 5) -; Inst 4: b label3 -; Block 3: -; (original IR block: block1) -; (instruction range: 5 .. 6) -; Inst 5: ret -; }} +; block0: +; adds xzr, x0, x2 +; adcs xzr, x1, x3 +; b.vc label1 ; b label2 +; block1: +; b label3 +; block2: +; b label3 +; block3: +; ret diff --git a/cranelift/filetests/filetests/isa/aarch64/condops.clif b/cranelift/filetests/filetests/isa/aarch64/condops.clif index 9ab7573d95..6813b270e9 100644 --- a/cranelift/filetests/filetests/isa/aarch64/condops.clif +++ b/cranelift/filetests/filetests/isa/aarch64/condops.clif @@ -10,16 +10,11 @@ block0(v0: i8, v1: i64, v2: i64): return v5 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 4) -; Inst 0: uxtb w0, w0 -; Inst 1: subs wzr, w0, #42 -; Inst 2: csel x0, x1, x2, eq -; Inst 3: ret -; }} +; block0: +; uxtb w8, w0 +; subs wzr, w8, #42 +; csel x0, x1, x2, eq +; ret function %g(i8) -> b1 { block0(v0: i8): @@ -29,16 +24,11 @@ block0(v0: i8): return v5 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 4) -; Inst 0: uxtb w0, w0 -; Inst 1: subs wzr, w0, #42 -; Inst 2: cset x0, eq -; Inst 3: ret -; }} +; block0: +; uxtb w4, w0 +; subs wzr, w4, #42 +; cset x0, eq +; ret function %h(i8, i8, i8) -> i8 { block0(v0: i8, v1: i8, v2: i8): @@ -46,16 +36,11 @@ block0(v0: i8, v1: i8, v2: i8): return v3 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 4) -; Inst 0: and x1, x1, x0 -; Inst 1: bic x0, x2, x0 -; Inst 2: orr x0, x0, x1 -; Inst 3: ret -; }} +; block0: +; and x8, x1, x0 +; bic x0, x2, x0 +; orr x0, x0, x8 +; ret function %i(b1, i8, i8) -> i8 { block0(v0: b1, v1: i8, v2: i8): @@ -63,16 +48,11 @@ block0(v0: b1, v1: i8, v2: i8): return v3 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 4) -; Inst 0: and w0, w0, #1 -; Inst 1: subs wzr, w0, wzr -; Inst 2: csel x0, x1, x2, ne -; Inst 3: ret -; }} +; block0: +; and w8, w0, #1 +; subs wzr, w8, wzr +; csel x0, x1, x2, ne +; ret function %i(i32, i8, i8) -> i8 { block0(v0: i32, v1: i8, v2: i8): @@ -82,15 +62,10 @@ block0(v0: i32, v1: i8, v2: i8): return v5 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 3) -; Inst 0: subs wzr, w0, #42 -; Inst 1: csel x0, x1, x2, eq -; Inst 2: ret -; }} +; block0: +; subs wzr, w0, #42 +; csel x0, x1, x2, eq +; ret function %i128_select(b1, i128, i128) -> i128 { block0(v0: b1, v1: i128, v2: i128): @@ -98,15 +73,10 @@ block0(v0: b1, v1: i128, v2: i128): return v3 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 5) -; Inst 0: and w0, w0, #1 -; Inst 1: subs wzr, w0, wzr -; Inst 2: csel x0, x2, x4, ne -; Inst 3: csel x1, x3, x5, ne -; Inst 4: ret -; }} +; block0: +; and w14, w0, #1 +; subs wzr, w14, wzr +; csel x0, x2, x4, ne +; csel x1, x3, x5, ne +; ret diff --git a/cranelift/filetests/filetests/isa/aarch64/constants.clif b/cranelift/filetests/filetests/isa/aarch64/constants.clif index 9357a75c76..130ecdd475 100644 --- a/cranelift/filetests/filetests/isa/aarch64/constants.clif +++ b/cranelift/filetests/filetests/isa/aarch64/constants.clif @@ -8,14 +8,9 @@ block0: return v0 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: movz x0, #255 -; Inst 1: ret -; }} +; block0: +; movz x0, #255 +; ret function %f() -> b16 { block0: @@ -23,14 +18,9 @@ block0: return v0 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: movz x0, #0 -; Inst 1: ret -; }} +; block0: +; movz x0, #0 +; ret function %f() -> i64 { block0: @@ -38,14 +28,9 @@ block0: return v0 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: movz x0, #0 -; Inst 1: ret -; }} +; block0: +; movz x0, #0 +; ret function %f() -> i64 { block0: @@ -53,14 +38,9 @@ block0: return v0 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: movz x0, #65535 -; Inst 1: ret -; }} +; block0: +; movz x0, #65535 +; ret function %f() -> i64 { block0: @@ -68,14 +48,9 @@ block0: return v0 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: movz x0, #65535, LSL #16 -; Inst 1: ret -; }} +; block0: +; movz x0, #65535, LSL #16 +; ret function %f() -> i64 { block0: @@ -83,14 +58,9 @@ block0: return v0 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: movz x0, #65535, LSL #32 -; Inst 1: ret -; }} +; block0: +; movz x0, #65535, LSL #32 +; ret function %f() -> i64 { block0: @@ -98,14 +68,9 @@ block0: return v0 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: movz x0, #65535, LSL #48 -; Inst 1: ret -; }} +; block0: +; movz x0, #65535, LSL #48 +; ret function %f() -> i64 { block0: @@ -113,14 +78,9 @@ block0: return v0 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: movn x0, #0 -; Inst 1: ret -; }} +; block0: +; movn x0, #0 +; ret function %f() -> i64 { block0: @@ -128,14 +88,9 @@ block0: return v0 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: movn x0, #65535 -; Inst 1: ret -; }} +; block0: +; movn x0, #65535 +; ret function %f() -> i64 { block0: @@ -143,14 +98,9 @@ block0: return v0 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: movn x0, #65535, LSL #16 -; Inst 1: ret -; }} +; block0: +; movn x0, #65535, LSL #16 +; ret function %f() -> i64 { block0: @@ -158,14 +108,9 @@ block0: return v0 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: movn x0, #65535, LSL #32 -; Inst 1: ret -; }} +; block0: +; movn x0, #65535, LSL #32 +; ret function %f() -> i64 { block0: @@ -173,14 +118,9 @@ block0: return v0 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: movn x0, #65535, LSL #48 -; Inst 1: ret -; }} +; block0: +; movn x0, #65535, LSL #48 +; ret function %f() -> i64 { block0: @@ -188,17 +128,12 @@ block0: return v0 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 5) -; Inst 0: movz x0, #58 -; Inst 1: movk x0, #4626, LSL #16 -; Inst 2: movk x0, #61603, LSL #32 -; Inst 3: movk x0, #62283, LSL #48 -; Inst 4: ret -; }} +; block0: +; movz x0, #58 +; movk x0, #4626, LSL #16 +; movk x0, #61603, LSL #32 +; movk x0, #62283, LSL #48 +; ret function %f() -> i64 { block0: @@ -206,15 +141,10 @@ block0: return v0 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 3) -; Inst 0: movz x0, #7924, LSL #16 -; Inst 1: movk x0, #4841, LSL #48 -; Inst 2: ret -; }} +; block0: +; movz x0, #7924, LSL #16 +; movk x0, #4841, LSL #48 +; ret function %f() -> i64 { block0: @@ -222,15 +152,10 @@ block0: return v0 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 3) -; Inst 0: movn x0, #57611, LSL #16 -; Inst 1: movk x0, #4841, LSL #48 -; Inst 2: ret -; }} +; block0: +; movn x0, #57611, LSL #16 +; movk x0, #4841, LSL #48 +; ret function %f() -> i32 { block0: @@ -238,14 +163,9 @@ block0: return v0 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: orr x0, xzr, #4294967295 -; Inst 1: ret -; }} +; block0: +; orr x0, xzr, #4294967295 +; ret function %f() -> i32 { block0: @@ -253,14 +173,9 @@ block0: return v0 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: movn w0, #8 -; Inst 1: ret -; }} +; block0: +; movn w0, #8 +; ret function %f() -> i64 { block0: @@ -268,14 +183,9 @@ block0: return v0 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: movn w0, #8 -; Inst 1: ret -; }} +; block0: +; movn w0, #8 +; ret function %f() -> i64 { block0: @@ -283,14 +193,9 @@ block0: return v0 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: movn x0, #8 -; Inst 1: ret -; }} +; block0: +; movn x0, #8 +; ret function %f() -> f64 { block0: @@ -298,14 +203,9 @@ block0: return v0 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: fmov d0, #1 -; Inst 1: ret -; }} +; block0: +; fmov d0, #1 +; ret function %f() -> f32 { block0: @@ -313,14 +213,9 @@ block0: return v0 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: fmov s0, #5 -; Inst 1: ret -; }} +; block0: +; fmov s0, #5 +; ret function %f() -> f64 { block0: @@ -328,15 +223,10 @@ block0: return v0 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 3) -; Inst 0: movz x0, #16457, LSL #48 -; Inst 1: fmov d0, x0 -; Inst 2: ret -; }} +; block0: +; movz x2, #16457, LSL #48 +; fmov d0, x2 +; ret function %f() -> f32 { block0: @@ -344,15 +234,10 @@ block0: return v0 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 3) -; Inst 0: movz x0, #16968, LSL #16 -; Inst 1: fmov s0, w0 -; Inst 2: ret -; }} +; block0: +; movz x2, #16968, LSL #16 +; fmov s0, w2 +; ret function %f() -> f64 { block0: @@ -360,14 +245,9 @@ block0: return v0 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: movi v0.2s, #0 -; Inst 1: ret -; }} +; block0: +; movi v0.2s, #0 +; ret function %f() -> f32 { block0: @@ -375,14 +255,9 @@ block0: return v0 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: movi v0.2s, #0 -; Inst 1: ret -; }} +; block0: +; movi v0.2s, #0 +; ret function %f() -> f64 { block0: @@ -390,14 +265,9 @@ block0: return v0 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: fmov d0, #-16 -; Inst 1: ret -; }} +; block0: +; fmov d0, #-16 +; ret function %f() -> f32 { block0: @@ -405,11 +275,7 @@ block0: return v0 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: fmov s0, #-16 -; Inst 1: ret -; }} +; block0: +; fmov s0, #-16 +; ret + diff --git a/cranelift/filetests/filetests/isa/aarch64/extend-op.clif b/cranelift/filetests/filetests/isa/aarch64/extend-op.clif index 4e80cfe7b8..9b31cd20fc 100644 --- a/cranelift/filetests/filetests/isa/aarch64/extend-op.clif +++ b/cranelift/filetests/filetests/isa/aarch64/extend-op.clif @@ -10,15 +10,10 @@ block0(v0: i8): return v3 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 3) -; Inst 0: sxtb x0, w0 -; Inst 1: add x0, x0, #42 -; Inst 2: ret -; }} +; block0: +; sxtb x4, w0 +; add x0, x4, #42 +; ret function %f2(i8, i64) -> i64 { block0(v0: i8, v1: i64): @@ -27,14 +22,9 @@ block0(v0: i8, v1: i64): return v3 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: add x0, x1, x0, SXTB -; Inst 1: ret -; }} +; block0: +; add x0, x1, x0, SXTB +; ret function %i128_uextend_i64(i64) -> i128 { block0(v0: i64): @@ -42,14 +32,9 @@ block0(v0: i64): return v1 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: movz x1, #0 -; Inst 1: ret -; }} +; block0: +; movz x1, #0 +; ret function %i128_sextend_i64(i64) -> i128 { block0(v0: i64): @@ -57,14 +42,9 @@ block0(v0: i64): return v1 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: asr x1, x0, #63 -; Inst 1: ret -; }} +; block0: +; asr x1, x0, #63 +; ret function %i128_uextend_i32(i32) -> i128 { block0(v0: i32): @@ -72,15 +52,10 @@ block0(v0: i32): return v1 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 3) -; Inst 0: mov w0, w0 -; Inst 1: movz x1, #0 -; Inst 2: ret -; }} +; block0: +; mov w0, w0 +; movz x1, #0 +; ret function %i128_sextend_i32(i32) -> i128 { block0(v0: i32): @@ -88,15 +63,10 @@ block0(v0: i32): return v1 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 3) -; Inst 0: sxtw x0, w0 -; Inst 1: asr x1, x0, #63 -; Inst 2: ret -; }} +; block0: +; sxtw x0, w0 +; asr x1, x0, #63 +; ret function %i128_uextend_i16(i16) -> i128 { block0(v0: i16): @@ -104,15 +74,10 @@ block0(v0: i16): return v1 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 3) -; Inst 0: uxth w0, w0 -; Inst 1: movz x1, #0 -; Inst 2: ret -; }} +; block0: +; uxth w0, w0 +; movz x1, #0 +; ret function %i128_sextend_i16(i16) -> i128 { block0(v0: i16): @@ -120,15 +85,10 @@ block0(v0: i16): return v1 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 3) -; Inst 0: sxth x0, w0 -; Inst 1: asr x1, x0, #63 -; Inst 2: ret -; }} +; block0: +; sxth x0, w0 +; asr x1, x0, #63 +; ret function %i128_uextend_i8(i8) -> i128 { block0(v0: i8): @@ -136,15 +96,10 @@ block0(v0: i8): return v1 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 3) -; Inst 0: uxtb w0, w0 -; Inst 1: movz x1, #0 -; Inst 2: ret -; }} +; block0: +; uxtb w0, w0 +; movz x1, #0 +; ret function %i128_sextend_i8(i8) -> i128 { block0(v0: i8): @@ -152,15 +107,10 @@ block0(v0: i8): return v1 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 3) -; Inst 0: sxtb x0, w0 -; Inst 1: asr x1, x0, #63 -; Inst 2: ret -; }} +; block0: +; sxtb x0, w0 +; asr x1, x0, #63 +; ret function %i8x16_uextend_i16(i8x16) -> i16 { block0(v0: i8x16): @@ -169,14 +119,9 @@ block0(v0: i8x16): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: umov w0, v0.b[1] -; Inst 1: ret -; }} +; block0: +; umov w0, v0.b[1] +; ret function %i8x16_uextend_i32(i8x16) -> i32 { block0(v0: i8x16): @@ -185,14 +130,9 @@ block0(v0: i8x16): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: umov w0, v0.b[1] -; Inst 1: ret -; }} +; block0: +; umov w0, v0.b[1] +; ret function %i8x16_uextend_i64(i8x16) -> i64 { block0(v0: i8x16): @@ -201,14 +141,9 @@ block0(v0: i8x16): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: umov w0, v0.b[1] -; Inst 1: ret -; }} +; block0: +; umov w0, v0.b[1] +; ret function %i8x16_uextend_i128(i8x16) -> i128 { block0(v0: i8x16): @@ -217,15 +152,10 @@ block0(v0: i8x16): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 3) -; Inst 0: umov w0, v0.b[1] -; Inst 1: movz x1, #0 -; Inst 2: ret -; }} +; block0: +; umov w0, v0.b[1] +; movz x1, #0 +; ret function %i8x16_sextend_i16(i8x16) -> i16 { block0(v0: i8x16): @@ -234,14 +164,9 @@ block0(v0: i8x16): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: smov w0, v0.b[1] -; Inst 1: ret -; }} +; block0: +; smov w0, v0.b[1] +; ret function %i8x16_sextend_i32(i8x16) -> i32 { block0(v0: i8x16): @@ -250,14 +175,9 @@ block0(v0: i8x16): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: smov w0, v0.b[1] -; Inst 1: ret -; }} +; block0: +; smov w0, v0.b[1] +; ret function %i8x16_sextend_i64(i8x16) -> i64 { block0(v0: i8x16): @@ -266,14 +186,9 @@ block0(v0: i8x16): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: smov x0, v0.b[1] -; Inst 1: ret -; }} +; block0: +; smov x0, v0.b[1] +; ret function %i8x16_sextend_i128(i8x16) -> i128 { block0(v0: i8x16): @@ -282,15 +197,10 @@ block0(v0: i8x16): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 3) -; Inst 0: smov x0, v0.b[1] -; Inst 1: asr x1, x0, #63 -; Inst 2: ret -; }} +; block0: +; smov x0, v0.b[1] +; asr x1, x0, #63 +; ret function %i16x8_uextend_i32(i16x8) -> i32 { block0(v0: i16x8): @@ -299,14 +209,9 @@ block0(v0: i16x8): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: umov w0, v0.h[1] -; Inst 1: ret -; }} +; block0: +; umov w0, v0.h[1] +; ret function %i16x8_uextend_i64(i16x8) -> i64 { block0(v0: i16x8): @@ -315,14 +220,9 @@ block0(v0: i16x8): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: umov w0, v0.h[1] -; Inst 1: ret -; }} +; block0: +; umov w0, v0.h[1] +; ret function %i16x8_uextend_i128(i16x8) -> i128 { block0(v0: i16x8): @@ -331,15 +231,10 @@ block0(v0: i16x8): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 3) -; Inst 0: umov w0, v0.h[1] -; Inst 1: movz x1, #0 -; Inst 2: ret -; }} +; block0: +; umov w0, v0.h[1] +; movz x1, #0 +; ret function %i16x8_sextend_i32(i16x8) -> i32 { block0(v0: i16x8): @@ -348,14 +243,9 @@ block0(v0: i16x8): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: smov w0, v0.h[1] -; Inst 1: ret -; }} +; block0: +; smov w0, v0.h[1] +; ret function %i16x8_sextend_i64(i16x8) -> i64 { block0(v0: i16x8): @@ -364,14 +254,9 @@ block0(v0: i16x8): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: smov x0, v0.h[1] -; Inst 1: ret -; }} +; block0: +; smov x0, v0.h[1] +; ret function %i16x8_sextend_i128(i16x8) -> i128 { block0(v0: i16x8): @@ -380,15 +265,10 @@ block0(v0: i16x8): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 3) -; Inst 0: smov x0, v0.h[1] -; Inst 1: asr x1, x0, #63 -; Inst 2: ret -; }} +; block0: +; smov x0, v0.h[1] +; asr x1, x0, #63 +; ret function %i32x4_uextend_i64(i32x4) -> i64 { block0(v0: i32x4): @@ -397,14 +277,9 @@ block0(v0: i32x4): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: mov w0, v0.s[1] -; Inst 1: ret -; }} +; block0: +; mov w0, v0.s[1] +; ret function %i32x4_uextend_i128(i32x4) -> i128 { block0(v0: i32x4): @@ -413,15 +288,10 @@ block0(v0: i32x4): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 3) -; Inst 0: mov w0, v0.s[1] -; Inst 1: movz x1, #0 -; Inst 2: ret -; }} +; block0: +; mov w0, v0.s[1] +; movz x1, #0 +; ret function %i32x4_sextend_i64(i32x4) -> i64 { block0(v0: i32x4): @@ -430,14 +300,9 @@ block0(v0: i32x4): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: smov x0, v0.s[1] -; Inst 1: ret -; }} +; block0: +; smov x0, v0.s[1] +; ret function %i32x4_sextend_i128(i32x4) -> i128 { block0(v0: i32x4): @@ -446,15 +311,10 @@ block0(v0: i32x4): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 3) -; Inst 0: smov x0, v0.s[1] -; Inst 1: asr x1, x0, #63 -; Inst 2: ret -; }} +; block0: +; smov x0, v0.s[1] +; asr x1, x0, #63 +; ret function %i64x2_uextend_i128(i64x2) -> i128 { block0(v0: i64x2): @@ -463,15 +323,10 @@ block0(v0: i64x2): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 3) -; Inst 0: mov x0, v0.d[1] -; Inst 1: movz x1, #0 -; Inst 2: ret -; }} +; block0: +; mov x0, v0.d[1] +; movz x1, #0 +; ret function %i64x2_sextend_i128(i64x2) -> i128 { block0(v0: i64x2): @@ -480,13 +335,8 @@ block0(v0: i64x2): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 3) -; Inst 0: mov x0, v0.d[1] -; Inst 1: asr x1, x0, #63 -; Inst 2: ret -; }} +; block0: +; mov x0, v0.d[1] +; asr x1, x0, #63 +; ret diff --git a/cranelift/filetests/filetests/isa/aarch64/fcvt-small.clif b/cranelift/filetests/filetests/isa/aarch64/fcvt-small.clif index d35b6ef54d..0755c94feb 100644 --- a/cranelift/filetests/filetests/isa/aarch64/fcvt-small.clif +++ b/cranelift/filetests/filetests/isa/aarch64/fcvt-small.clif @@ -8,15 +8,10 @@ block0(v0: i8): return v1 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 3) -; Inst 0: uxtb w0, w0 -; Inst 1: ucvtf s0, w0 -; Inst 2: ret -; }} +; block0: +; uxtb w4, w0 +; ucvtf s0, w4 +; ret function u0:0(i8) -> f64 { block0(v0: i8): @@ -24,15 +19,10 @@ block0(v0: i8): return v1 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 3) -; Inst 0: uxtb w0, w0 -; Inst 1: ucvtf d0, w0 -; Inst 2: ret -; }} +; block0: +; uxtb w4, w0 +; ucvtf d0, w4 +; ret function u0:0(i16) -> f32 { block0(v0: i16): @@ -40,15 +30,10 @@ block0(v0: i16): return v1 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 3) -; Inst 0: uxth w0, w0 -; Inst 1: ucvtf s0, w0 -; Inst 2: ret -; }} +; block0: +; uxth w4, w0 +; ucvtf s0, w4 +; ret function u0:0(i16) -> f64 { block0(v0: i16): @@ -56,15 +41,10 @@ block0(v0: i16): return v1 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 3) -; Inst 0: uxth w0, w0 -; Inst 1: ucvtf d0, w0 -; Inst 2: ret -; }} +; block0: +; uxth w4, w0 +; ucvtf d0, w4 +; ret function u0:0(f32) -> i8 { block0(v0: f32): @@ -72,23 +52,18 @@ block0(v0: f32): return v1 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 11) -; Inst 0: fcmp s0, s0 -; Inst 1: b.vc 8 ; udf -; Inst 2: fmov s1, #-1 -; Inst 3: fcmp s0, s1 -; Inst 4: b.gt 8 ; udf -; Inst 5: movz x0, #17280, LSL #16 -; Inst 6: fmov s1, w0 -; Inst 7: fcmp s0, s1 -; Inst 8: b.mi 8 ; udf -; Inst 9: fcvtzu w0, s0 -; Inst 10: ret -; }} +; block0: +; fcmp s0, s0 +; b.vc 8 ; udf +; fmov s6, #-1 +; fcmp s0, s6 +; b.gt 8 ; udf +; movz x10, #17280, LSL #16 +; fmov s6, w10 +; fcmp s0, s6 +; b.mi 8 ; udf +; fcvtzu w0, s0 +; ret function u0:0(f64) -> i8 { block0(v0: f64): @@ -96,23 +71,18 @@ block0(v0: f64): return v1 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 11) -; Inst 0: fcmp d0, d0 -; Inst 1: b.vc 8 ; udf -; Inst 2: fmov d1, #-1 -; Inst 3: fcmp d0, d1 -; Inst 4: b.gt 8 ; udf -; Inst 5: movz x0, #16496, LSL #48 -; Inst 6: fmov d1, x0 -; Inst 7: fcmp d0, d1 -; Inst 8: b.mi 8 ; udf -; Inst 9: fcvtzu w0, d0 -; Inst 10: ret -; }} +; block0: +; fcmp d0, d0 +; b.vc 8 ; udf +; fmov d6, #-1 +; fcmp d0, d6 +; b.gt 8 ; udf +; movz x10, #16496, LSL #48 +; fmov d6, x10 +; fcmp d0, d6 +; b.mi 8 ; udf +; fcvtzu w0, d0 +; ret function u0:0(f32) -> i16 { block0(v0: f32): @@ -120,23 +90,18 @@ block0(v0: f32): return v1 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 11) -; Inst 0: fcmp s0, s0 -; Inst 1: b.vc 8 ; udf -; Inst 2: fmov s1, #-1 -; Inst 3: fcmp s0, s1 -; Inst 4: b.gt 8 ; udf -; Inst 5: movz x0, #18304, LSL #16 -; Inst 6: fmov s1, w0 -; Inst 7: fcmp s0, s1 -; Inst 8: b.mi 8 ; udf -; Inst 9: fcvtzu w0, s0 -; Inst 10: ret -; }} +; block0: +; fcmp s0, s0 +; b.vc 8 ; udf +; fmov s6, #-1 +; fcmp s0, s6 +; b.gt 8 ; udf +; movz x10, #18304, LSL #16 +; fmov s6, w10 +; fcmp s0, s6 +; b.mi 8 ; udf +; fcvtzu w0, s0 +; ret function u0:0(f64) -> i16 { block0(v0: f64): @@ -144,21 +109,16 @@ block0(v0: f64): return v1 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 11) -; Inst 0: fcmp d0, d0 -; Inst 1: b.vc 8 ; udf -; Inst 2: fmov d1, #-1 -; Inst 3: fcmp d0, d1 -; Inst 4: b.gt 8 ; udf -; Inst 5: movz x0, #16624, LSL #48 -; Inst 6: fmov d1, x0 -; Inst 7: fcmp d0, d1 -; Inst 8: b.mi 8 ; udf -; Inst 9: fcvtzu w0, d0 -; Inst 10: ret -; }} +; block0: +; fcmp d0, d0 +; b.vc 8 ; udf +; fmov d6, #-1 +; fcmp d0, d6 +; b.gt 8 ; udf +; movz x10, #16624, LSL #48 +; fmov d6, x10 +; fcmp d0, d6 +; b.mi 8 ; udf +; fcvtzu w0, d0 +; ret diff --git a/cranelift/filetests/filetests/isa/aarch64/floating-point.clif b/cranelift/filetests/filetests/isa/aarch64/floating-point.clif index 16b34f759f..a8c3cb8191 100644 --- a/cranelift/filetests/filetests/isa/aarch64/floating-point.clif +++ b/cranelift/filetests/filetests/isa/aarch64/floating-point.clif @@ -8,14 +8,9 @@ block0(v0: f32, v1: f32): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: fadd s0, s0, s1 -; Inst 1: ret -; }} +; block0: +; fadd s0, s0, s1 +; ret function %f2(f64, f64) -> f64 { block0(v0: f64, v1: f64): @@ -23,14 +18,9 @@ block0(v0: f64, v1: f64): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: fadd d0, d0, d1 -; Inst 1: ret -; }} +; block0: +; fadd d0, d0, d1 +; ret function %f3(f32, f32) -> f32 { block0(v0: f32, v1: f32): @@ -38,14 +28,9 @@ block0(v0: f32, v1: f32): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: fsub s0, s0, s1 -; Inst 1: ret -; }} +; block0: +; fsub s0, s0, s1 +; ret function %f4(f64, f64) -> f64 { block0(v0: f64, v1: f64): @@ -53,14 +38,9 @@ block0(v0: f64, v1: f64): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: fsub d0, d0, d1 -; Inst 1: ret -; }} +; block0: +; fsub d0, d0, d1 +; ret function %f5(f32, f32) -> f32 { block0(v0: f32, v1: f32): @@ -68,14 +48,9 @@ block0(v0: f32, v1: f32): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: fmul s0, s0, s1 -; Inst 1: ret -; }} +; block0: +; fmul s0, s0, s1 +; ret function %f6(f64, f64) -> f64 { block0(v0: f64, v1: f64): @@ -83,14 +58,9 @@ block0(v0: f64, v1: f64): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: fmul d0, d0, d1 -; Inst 1: ret -; }} +; block0: +; fmul d0, d0, d1 +; ret function %f7(f32, f32) -> f32 { block0(v0: f32, v1: f32): @@ -98,14 +68,9 @@ block0(v0: f32, v1: f32): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: fdiv s0, s0, s1 -; Inst 1: ret -; }} +; block0: +; fdiv s0, s0, s1 +; ret function %f8(f64, f64) -> f64 { block0(v0: f64, v1: f64): @@ -113,14 +78,9 @@ block0(v0: f64, v1: f64): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: fdiv d0, d0, d1 -; Inst 1: ret -; }} +; block0: +; fdiv d0, d0, d1 +; ret function %f9(f32, f32) -> f32 { block0(v0: f32, v1: f32): @@ -128,14 +88,9 @@ block0(v0: f32, v1: f32): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: fmin s0, s0, s1 -; Inst 1: ret -; }} +; block0: +; fmin s0, s0, s1 +; ret function %f10(f64, f64) -> f64 { block0(v0: f64, v1: f64): @@ -143,14 +98,9 @@ block0(v0: f64, v1: f64): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: fmin d0, d0, d1 -; Inst 1: ret -; }} +; block0: +; fmin d0, d0, d1 +; ret function %f11(f32, f32) -> f32 { block0(v0: f32, v1: f32): @@ -158,14 +108,9 @@ block0(v0: f32, v1: f32): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: fmax s0, s0, s1 -; Inst 1: ret -; }} +; block0: +; fmax s0, s0, s1 +; ret function %f12(f64, f64) -> f64 { block0(v0: f64, v1: f64): @@ -173,14 +118,9 @@ block0(v0: f64, v1: f64): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: fmax d0, d0, d1 -; Inst 1: ret -; }} +; block0: +; fmax d0, d0, d1 +; ret function %f13(f32) -> f32 { block0(v0: f32): @@ -188,14 +128,9 @@ block0(v0: f32): return v1 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: fsqrt s0, s0 -; Inst 1: ret -; }} +; block0: +; fsqrt s0, s0 +; ret function %f15(f64) -> f64 { block0(v0: f64): @@ -203,14 +138,9 @@ block0(v0: f64): return v1 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: fsqrt d0, d0 -; Inst 1: ret -; }} +; block0: +; fsqrt d0, d0 +; ret function %f16(f32) -> f32 { block0(v0: f32): @@ -218,14 +148,9 @@ block0(v0: f32): return v1 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: fabs s0, s0 -; Inst 1: ret -; }} +; block0: +; fabs s0, s0 +; ret function %f17(f64) -> f64 { block0(v0: f64): @@ -233,14 +158,9 @@ block0(v0: f64): return v1 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: fabs d0, d0 -; Inst 1: ret -; }} +; block0: +; fabs d0, d0 +; ret function %f18(f32) -> f32 { block0(v0: f32): @@ -248,14 +168,9 @@ block0(v0: f32): return v1 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: fneg s0, s0 -; Inst 1: ret -; }} +; block0: +; fneg s0, s0 +; ret function %f19(f64) -> f64 { block0(v0: f64): @@ -263,14 +178,9 @@ block0(v0: f64): return v1 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: fneg d0, d0 -; Inst 1: ret -; }} +; block0: +; fneg d0, d0 +; ret function %f20(f32) -> f64 { block0(v0: f32): @@ -278,14 +188,9 @@ block0(v0: f32): return v1 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: fcvt d0, s0 -; Inst 1: ret -; }} +; block0: +; fcvt d0, s0 +; ret function %f21(f64) -> f32 { block0(v0: f64): @@ -293,14 +198,9 @@ block0(v0: f64): return v1 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: fcvt s0, d0 -; Inst 1: ret -; }} +; block0: +; fcvt s0, d0 +; ret function %f22(f32) -> f32 { block0(v0: f32): @@ -308,14 +208,9 @@ block0(v0: f32): return v1 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: frintp s0, s0 -; Inst 1: ret -; }} +; block0: +; frintp s0, s0 +; ret function %f22(f64) -> f64 { block0(v0: f64): @@ -323,14 +218,9 @@ block0(v0: f64): return v1 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: frintp d0, d0 -; Inst 1: ret -; }} +; block0: +; frintp d0, d0 +; ret function %f23(f32) -> f32 { block0(v0: f32): @@ -338,14 +228,9 @@ block0(v0: f32): return v1 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: frintm s0, s0 -; Inst 1: ret -; }} +; block0: +; frintm s0, s0 +; ret function %f24(f64) -> f64 { block0(v0: f64): @@ -353,14 +238,9 @@ block0(v0: f64): return v1 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: frintm d0, d0 -; Inst 1: ret -; }} +; block0: +; frintm d0, d0 +; ret function %f25(f32) -> f32 { block0(v0: f32): @@ -368,14 +248,9 @@ block0(v0: f32): return v1 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: frintz s0, s0 -; Inst 1: ret -; }} +; block0: +; frintz s0, s0 +; ret function %f26(f64) -> f64 { block0(v0: f64): @@ -383,14 +258,9 @@ block0(v0: f64): return v1 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: frintz d0, d0 -; Inst 1: ret -; }} +; block0: +; frintz d0, d0 +; ret function %f27(f32) -> f32 { block0(v0: f32): @@ -398,14 +268,9 @@ block0(v0: f32): return v1 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: frintn s0, s0 -; Inst 1: ret -; }} +; block0: +; frintn s0, s0 +; ret function %f28(f64) -> f64 { block0(v0: f64): @@ -413,14 +278,9 @@ block0(v0: f64): return v1 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: frintn d0, d0 -; Inst 1: ret -; }} +; block0: +; frintn d0, d0 +; ret function %f29(f32, f32, f32) -> f32 { block0(v0: f32, v1: f32, v2: f32): @@ -428,14 +288,9 @@ block0(v0: f32, v1: f32, v2: f32): return v3 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: fmadd s0, s0, s1, s2 -; Inst 1: ret -; }} +; block0: +; fmadd s0, s0, s1, s2 +; ret function %f30(f64, f64, f64) -> f64 { block0(v0: f64, v1: f64, v2: f64): @@ -443,14 +298,9 @@ block0(v0: f64, v1: f64, v2: f64): return v3 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: fmadd d0, d0, d1, d2 -; Inst 1: ret -; }} +; block0: +; fmadd d0, d0, d1, d2 +; ret function %f31(f32, f32) -> f32 { block0(v0: f32, v1: f32): @@ -458,15 +308,10 @@ block0(v0: f32, v1: f32): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 3) -; Inst 0: ushr v1.2s, v1.2s, #31 -; Inst 1: sli v0.2s, v1.2s, #31 -; Inst 2: ret -; }} +; block0: +; ushr v7.2s, v1.2s, #31 +; sli v0.2s, v7.2s, #31 +; ret function %f32(f64, f64) -> f64 { block0(v0: f64, v1: f64): @@ -474,15 +319,10 @@ block0(v0: f64, v1: f64): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 3) -; Inst 0: ushr d1, d1, #63 -; Inst 1: sli d0, d1, #63 -; Inst 2: ret -; }} +; block0: +; ushr d7, d1, #63 +; sli d0, d7, #63 +; ret function %f33(f32) -> i32 { block0(v0: f32): @@ -490,23 +330,18 @@ block0(v0: f32): return v1 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 11) -; Inst 0: fcmp s0, s0 -; Inst 1: b.vc 8 ; udf -; Inst 2: fmov s1, #-1 -; Inst 3: fcmp s0, s1 -; Inst 4: b.gt 8 ; udf -; Inst 5: movz x0, #20352, LSL #16 -; Inst 6: fmov s1, w0 -; Inst 7: fcmp s0, s1 -; Inst 8: b.mi 8 ; udf -; Inst 9: fcvtzu w0, s0 -; Inst 10: ret -; }} +; block0: +; fcmp s0, s0 +; b.vc 8 ; udf +; fmov s6, #-1 +; fcmp s0, s6 +; b.gt 8 ; udf +; movz x10, #20352, LSL #16 +; fmov s6, w10 +; fcmp s0, s6 +; b.mi 8 ; udf +; fcvtzu w0, s0 +; ret function %f34(f32) -> i32 { block0(v0: f32): @@ -514,24 +349,19 @@ block0(v0: f32): return v1 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 12) -; Inst 0: fcmp s0, s0 -; Inst 1: b.vc 8 ; udf -; Inst 2: movz x0, #52992, LSL #16 -; Inst 3: fmov s1, w0 -; Inst 4: fcmp s0, s1 -; Inst 5: b.ge 8 ; udf -; Inst 6: movz x0, #20224, LSL #16 -; Inst 7: fmov s1, w0 -; Inst 8: fcmp s0, s1 -; Inst 9: b.mi 8 ; udf -; Inst 10: fcvtzs w0, s0 -; Inst 11: ret -; }} +; block0: +; fcmp s0, s0 +; b.vc 8 ; udf +; movz x7, #52992, LSL #16 +; fmov s7, w7 +; fcmp s0, s7 +; b.ge 8 ; udf +; movz x12, #20224, LSL #16 +; fmov s7, w12 +; fcmp s0, s7 +; b.mi 8 ; udf +; fcvtzs w0, s0 +; ret function %f35(f32) -> i64 { block0(v0: f32): @@ -539,23 +369,18 @@ block0(v0: f32): return v1 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 11) -; Inst 0: fcmp s0, s0 -; Inst 1: b.vc 8 ; udf -; Inst 2: fmov s1, #-1 -; Inst 3: fcmp s0, s1 -; Inst 4: b.gt 8 ; udf -; Inst 5: movz x0, #24448, LSL #16 -; Inst 6: fmov s1, w0 -; Inst 7: fcmp s0, s1 -; Inst 8: b.mi 8 ; udf -; Inst 9: fcvtzu x0, s0 -; Inst 10: ret -; }} +; block0: +; fcmp s0, s0 +; b.vc 8 ; udf +; fmov s6, #-1 +; fcmp s0, s6 +; b.gt 8 ; udf +; movz x10, #24448, LSL #16 +; fmov s6, w10 +; fcmp s0, s6 +; b.mi 8 ; udf +; fcvtzu x0, s0 +; ret function %f36(f32) -> i64 { block0(v0: f32): @@ -563,24 +388,19 @@ block0(v0: f32): return v1 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 12) -; Inst 0: fcmp s0, s0 -; Inst 1: b.vc 8 ; udf -; Inst 2: movz x0, #57088, LSL #16 -; Inst 3: fmov s1, w0 -; Inst 4: fcmp s0, s1 -; Inst 5: b.ge 8 ; udf -; Inst 6: movz x0, #24320, LSL #16 -; Inst 7: fmov s1, w0 -; Inst 8: fcmp s0, s1 -; Inst 9: b.mi 8 ; udf -; Inst 10: fcvtzs x0, s0 -; Inst 11: ret -; }} +; block0: +; fcmp s0, s0 +; b.vc 8 ; udf +; movz x7, #57088, LSL #16 +; fmov s7, w7 +; fcmp s0, s7 +; b.ge 8 ; udf +; movz x12, #24320, LSL #16 +; fmov s7, w12 +; fcmp s0, s7 +; b.mi 8 ; udf +; fcvtzs x0, s0 +; ret function %f37(f64) -> i32 { block0(v0: f64): @@ -588,23 +408,18 @@ block0(v0: f64): return v1 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 11) -; Inst 0: fcmp d0, d0 -; Inst 1: b.vc 8 ; udf -; Inst 2: fmov d1, #-1 -; Inst 3: fcmp d0, d1 -; Inst 4: b.gt 8 ; udf -; Inst 5: movz x0, #16880, LSL #48 -; Inst 6: fmov d1, x0 -; Inst 7: fcmp d0, d1 -; Inst 8: b.mi 8 ; udf -; Inst 9: fcvtzu w0, d0 -; Inst 10: ret -; }} +; block0: +; fcmp d0, d0 +; b.vc 8 ; udf +; fmov d6, #-1 +; fcmp d0, d6 +; b.gt 8 ; udf +; movz x10, #16880, LSL #48 +; fmov d6, x10 +; fcmp d0, d6 +; b.mi 8 ; udf +; fcvtzu w0, d0 +; ret function %f38(f64) -> i32 { block0(v0: f64): @@ -612,23 +427,18 @@ block0(v0: f64): return v1 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 11) -; Inst 0: fcmp d0, d0 -; Inst 1: b.vc 8 ; udf -; Inst 2: ldr d1, pc+8 ; b 12 ; data.f64 -2147483649 -; Inst 3: fcmp d0, d1 -; Inst 4: b.gt 8 ; udf -; Inst 5: movz x0, #16864, LSL #48 -; Inst 6: fmov d1, x0 -; Inst 7: fcmp d0, d1 -; Inst 8: b.mi 8 ; udf -; Inst 9: fcvtzs w0, d0 -; Inst 10: ret -; }} +; block0: +; fcmp d0, d0 +; b.vc 8 ; udf +; ldr d6, pc+8 ; b 12 ; data.f64 -2147483649 +; fcmp d0, d6 +; b.gt 8 ; udf +; movz x10, #16864, LSL #48 +; fmov d6, x10 +; fcmp d0, d6 +; b.mi 8 ; udf +; fcvtzs w0, d0 +; ret function %f39(f64) -> i64 { block0(v0: f64): @@ -636,23 +446,18 @@ block0(v0: f64): return v1 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 11) -; Inst 0: fcmp d0, d0 -; Inst 1: b.vc 8 ; udf -; Inst 2: fmov d1, #-1 -; Inst 3: fcmp d0, d1 -; Inst 4: b.gt 8 ; udf -; Inst 5: movz x0, #17392, LSL #48 -; Inst 6: fmov d1, x0 -; Inst 7: fcmp d0, d1 -; Inst 8: b.mi 8 ; udf -; Inst 9: fcvtzu x0, d0 -; Inst 10: ret -; }} +; block0: +; fcmp d0, d0 +; b.vc 8 ; udf +; fmov d6, #-1 +; fcmp d0, d6 +; b.gt 8 ; udf +; movz x10, #17392, LSL #48 +; fmov d6, x10 +; fcmp d0, d6 +; b.mi 8 ; udf +; fcvtzu x0, d0 +; ret function %f40(f64) -> i64 { block0(v0: f64): @@ -660,24 +465,19 @@ block0(v0: f64): return v1 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 12) -; Inst 0: fcmp d0, d0 -; Inst 1: b.vc 8 ; udf -; Inst 2: movz x0, #50144, LSL #48 -; Inst 3: fmov d1, x0 -; Inst 4: fcmp d0, d1 -; Inst 5: b.ge 8 ; udf -; Inst 6: movz x0, #17376, LSL #48 -; Inst 7: fmov d1, x0 -; Inst 8: fcmp d0, d1 -; Inst 9: b.mi 8 ; udf -; Inst 10: fcvtzs x0, d0 -; Inst 11: ret -; }} +; block0: +; fcmp d0, d0 +; b.vc 8 ; udf +; movz x7, #50144, LSL #48 +; fmov d7, x7 +; fcmp d0, d7 +; b.ge 8 ; udf +; movz x12, #17376, LSL #48 +; fmov d7, x12 +; fcmp d0, d7 +; b.mi 8 ; udf +; fcvtzs x0, d0 +; ret function %f41(i32) -> f32 { block0(v0: i32): @@ -685,14 +485,9 @@ block0(v0: i32): return v1 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: ucvtf s0, w0 -; Inst 1: ret -; }} +; block0: +; ucvtf s0, w0 +; ret function %f42(i32) -> f32 { block0(v0: i32): @@ -700,14 +495,9 @@ block0(v0: i32): return v1 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: scvtf s0, w0 -; Inst 1: ret -; }} +; block0: +; scvtf s0, w0 +; ret function %f43(i64) -> f32 { block0(v0: i64): @@ -715,14 +505,9 @@ block0(v0: i64): return v1 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: ucvtf s0, x0 -; Inst 1: ret -; }} +; block0: +; ucvtf s0, x0 +; ret function %f44(i64) -> f32 { block0(v0: i64): @@ -730,14 +515,9 @@ block0(v0: i64): return v1 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: scvtf s0, x0 -; Inst 1: ret -; }} +; block0: +; scvtf s0, x0 +; ret function %f45(i32) -> f64 { block0(v0: i32): @@ -745,14 +525,9 @@ block0(v0: i32): return v1 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: ucvtf d0, w0 -; Inst 1: ret -; }} +; block0: +; ucvtf d0, w0 +; ret function %f46(i32) -> f64 { block0(v0: i32): @@ -760,14 +535,9 @@ block0(v0: i32): return v1 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: scvtf d0, w0 -; Inst 1: ret -; }} +; block0: +; scvtf d0, w0 +; ret function %f47(i64) -> f64 { block0(v0: i64): @@ -775,14 +545,9 @@ block0(v0: i64): return v1 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: ucvtf d0, x0 -; Inst 1: ret -; }} +; block0: +; ucvtf d0, x0 +; ret function %f48(i64) -> f64 { block0(v0: i64): @@ -790,14 +555,9 @@ block0(v0: i64): return v1 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: scvtf d0, x0 -; Inst 1: ret -; }} +; block0: +; scvtf d0, x0 +; ret function %f49(f32) -> i32 { block0(v0: f32): @@ -805,21 +565,16 @@ block0(v0: f32): return v1 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 9) -; Inst 0: movz x0, #20352, LSL #16 -; Inst 1: fmov s1, w0 -; Inst 2: fmin s2, s0, s1 -; Inst 3: movi v1.2s, #0 -; Inst 4: fmax s2, s2, s1 -; Inst 5: fcmp s0, s0 -; Inst 6: fcsel s0, s1, s2, ne -; Inst 7: fcvtzu w0, s0 -; Inst 8: ret -; }} +; block0: +; movz x6, #20352, LSL #16 +; fmov s5, w6 +; fmin s7, s0, s5 +; movi v5.2s, #0 +; fmax s7, s7, s5 +; fcmp s0, s0 +; fcsel s7, s5, s7, ne +; fcvtzu w0, s7 +; ret function %f50(f32) -> i32 { block0(v0: f32): @@ -827,23 +582,18 @@ block0(v0: f32): return v1 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 11) -; Inst 0: movz x0, #20224, LSL #16 -; Inst 1: fmov s1, w0 -; Inst 2: fmin s1, s0, s1 -; Inst 3: movz x0, #52992, LSL #16 -; Inst 4: fmov s2, w0 -; Inst 5: fmax s1, s1, s2 -; Inst 6: movi v2.2s, #0 -; Inst 7: fcmp s0, s0 -; Inst 8: fcsel s0, s2, s1, ne -; Inst 9: fcvtzs w0, s0 -; Inst 10: ret -; }} +; block0: +; movz x6, #20224, LSL #16 +; fmov s5, w6 +; fmin s7, s0, s5 +; movz x10, #52992, LSL #16 +; fmov s5, w10 +; fmax s7, s7, s5 +; movi v5.2s, #0 +; fcmp s0, s0 +; fcsel s7, s5, s7, ne +; fcvtzs w0, s7 +; ret function %f51(f32) -> i64 { block0(v0: f32): @@ -851,21 +601,16 @@ block0(v0: f32): return v1 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 9) -; Inst 0: movz x0, #24448, LSL #16 -; Inst 1: fmov s1, w0 -; Inst 2: fmin s2, s0, s1 -; Inst 3: movi v1.2s, #0 -; Inst 4: fmax s2, s2, s1 -; Inst 5: fcmp s0, s0 -; Inst 6: fcsel s0, s1, s2, ne -; Inst 7: fcvtzu x0, s0 -; Inst 8: ret -; }} +; block0: +; movz x6, #24448, LSL #16 +; fmov s5, w6 +; fmin s7, s0, s5 +; movi v5.2s, #0 +; fmax s7, s7, s5 +; fcmp s0, s0 +; fcsel s7, s5, s7, ne +; fcvtzu x0, s7 +; ret function %f52(f32) -> i64 { block0(v0: f32): @@ -873,23 +618,18 @@ block0(v0: f32): return v1 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 11) -; Inst 0: movz x0, #24320, LSL #16 -; Inst 1: fmov s1, w0 -; Inst 2: fmin s1, s0, s1 -; Inst 3: movz x0, #57088, LSL #16 -; Inst 4: fmov s2, w0 -; Inst 5: fmax s1, s1, s2 -; Inst 6: movi v2.2s, #0 -; Inst 7: fcmp s0, s0 -; Inst 8: fcsel s0, s2, s1, ne -; Inst 9: fcvtzs x0, s0 -; Inst 10: ret -; }} +; block0: +; movz x6, #24320, LSL #16 +; fmov s5, w6 +; fmin s7, s0, s5 +; movz x10, #57088, LSL #16 +; fmov s5, w10 +; fmax s7, s7, s5 +; movi v5.2s, #0 +; fcmp s0, s0 +; fcsel s7, s5, s7, ne +; fcvtzs x0, s7 +; ret function %f53(f64) -> i32 { block0(v0: f64): @@ -897,20 +637,15 @@ block0(v0: f64): return v1 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 8) -; Inst 0: ldr d1, pc+8 ; b 12 ; data.f64 4294967295 -; Inst 1: fmin d2, d0, d1 -; Inst 2: movi v1.2s, #0 -; Inst 3: fmax d2, d2, d1 -; Inst 4: fcmp d0, d0 -; Inst 5: fcsel d0, d1, d2, ne -; Inst 6: fcvtzu w0, d0 -; Inst 7: ret -; }} +; block0: +; ldr d4, pc+8 ; b 12 ; data.f64 4294967295 +; fmin d6, d0, d4 +; movi v4.2s, #0 +; fmax d6, d6, d4 +; fcmp d0, d0 +; fcsel d6, d4, d6, ne +; fcvtzu w0, d6 +; ret function %f54(f64) -> i32 { block0(v0: f64): @@ -918,22 +653,17 @@ block0(v0: f64): return v1 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 10) -; Inst 0: ldr d1, pc+8 ; b 12 ; data.f64 2147483647 -; Inst 1: fmin d1, d0, d1 -; Inst 2: movz x0, #49632, LSL #48 -; Inst 3: fmov d2, x0 -; Inst 4: fmax d1, d1, d2 -; Inst 5: movi v2.2s, #0 -; Inst 6: fcmp d0, d0 -; Inst 7: fcsel d0, d2, d1, ne -; Inst 8: fcvtzs w0, d0 -; Inst 9: ret -; }} +; block0: +; ldr d4, pc+8 ; b 12 ; data.f64 2147483647 +; fmin d6, d0, d4 +; movz x8, #49632, LSL #48 +; fmov d4, x8 +; fmax d6, d6, d4 +; movi v4.2s, #0 +; fcmp d0, d0 +; fcsel d6, d4, d6, ne +; fcvtzs w0, d6 +; ret function %f55(f64) -> i64 { block0(v0: f64): @@ -941,21 +671,16 @@ block0(v0: f64): return v1 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 9) -; Inst 0: movz x0, #17392, LSL #48 -; Inst 1: fmov d1, x0 -; Inst 2: fmin d2, d0, d1 -; Inst 3: movi v1.2s, #0 -; Inst 4: fmax d2, d2, d1 -; Inst 5: fcmp d0, d0 -; Inst 6: fcsel d0, d1, d2, ne -; Inst 7: fcvtzu x0, d0 -; Inst 8: ret -; }} +; block0: +; movz x6, #17392, LSL #48 +; fmov d5, x6 +; fmin d7, d0, d5 +; movi v5.2s, #0 +; fmax d7, d7, d5 +; fcmp d0, d0 +; fcsel d7, d5, d7, ne +; fcvtzu x0, d7 +; ret function %f56(f64) -> i64 { block0(v0: f64): @@ -963,21 +688,16 @@ block0(v0: f64): return v1 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 11) -; Inst 0: movz x0, #17376, LSL #48 -; Inst 1: fmov d1, x0 -; Inst 2: fmin d1, d0, d1 -; Inst 3: movz x0, #50144, LSL #48 -; Inst 4: fmov d2, x0 -; Inst 5: fmax d1, d1, d2 -; Inst 6: movi v2.2s, #0 -; Inst 7: fcmp d0, d0 -; Inst 8: fcsel d0, d2, d1, ne -; Inst 9: fcvtzs x0, d0 -; Inst 10: ret -; }} +; block0: +; movz x6, #17376, LSL #48 +; fmov d5, x6 +; fmin d7, d0, d5 +; movz x10, #50144, LSL #48 +; fmov d5, x10 +; fmax d7, d7, d5 +; movi v5.2s, #0 +; fcmp d0, d0 +; fcsel d7, d5, d7, ne +; fcvtzs x0, d7 +; ret diff --git a/cranelift/filetests/filetests/isa/aarch64/heap_addr.clif b/cranelift/filetests/filetests/isa/aarch64/heap_addr.clif index f8227627d8..d353c80e1b 100644 --- a/cranelift/filetests/filetests/isa/aarch64/heap_addr.clif +++ b/cranelift/filetests/filetests/isa/aarch64/heap_addr.clif @@ -13,31 +13,20 @@ block0(v0: i64, v1: i32): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (successor: Block 1) -; (successor: Block 2) -; (instruction range: 0 .. 5) -; Inst 0: mov w2, w1 -; Inst 1: ldr x3, [x0] -; Inst 2: mov x3, x3 -; Inst 3: subs xzr, x2, x3 -; Inst 4: b.ls label1 ; b label2 -; Block 1: -; (original IR block: block2) -; (instruction range: 5 .. 10) -; Inst 5: add x0, x0, x1, UXTW -; Inst 6: subs xzr, x2, x3 -; Inst 7: movz x1, #0 -; Inst 8: csel x0, x1, x0, hi -; Inst 9: ret -; Block 2: -; (original IR block: block1) -; (instruction range: 10 .. 11) -; Inst 10: udf -; }} +; block0: +; mov w10, w1 +; ldr x5, [x0] +; mov x11, x5 +; subs xzr, x10, x11 +; b.ls label1 ; b label2 +; block1: +; add x13, x0, x1, UXTW +; subs xzr, x10, x11 +; movz x14, #0 +; csel x0, x14, x13, hi +; ret +; block2: +; udf function %static_heap_check(i64 vmctx, i32) -> i64 { gv0 = vmctx @@ -48,27 +37,16 @@ block0(v0: i64, v1: i32): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (successor: Block 1) -; (successor: Block 2) -; (instruction range: 0 .. 3) -; Inst 0: mov w2, w1 -; Inst 1: subs xzr, x2, #65536 -; Inst 2: b.ls label1 ; b label2 -; Block 1: -; (original IR block: block2) -; (instruction range: 3 .. 8) -; Inst 3: add x0, x0, x1, UXTW -; Inst 4: subs xzr, x2, #65536 -; Inst 5: movz x1, #0 -; Inst 6: csel x0, x1, x0, hi -; Inst 7: ret -; Block 2: -; (original IR block: block1) -; (instruction range: 8 .. 9) -; Inst 8: udf -; }} +; block0: +; mov w8, w1 +; subs xzr, x8, #65536 +; b.ls label1 ; b label2 +; block1: +; add x10, x0, x1, UXTW +; subs xzr, x8, #65536 +; movz x11, #0 +; csel x0, x11, x10, hi +; ret +; block2: +; udf diff --git a/cranelift/filetests/filetests/isa/aarch64/iconst-icmp-small.clif b/cranelift/filetests/filetests/isa/aarch64/iconst-icmp-small.clif index 2dcf741ddf..b6be2e7bcb 100644 --- a/cranelift/filetests/filetests/isa/aarch64/iconst-icmp-small.clif +++ b/cranelift/filetests/filetests/isa/aarch64/iconst-icmp-small.clif @@ -14,17 +14,12 @@ block0: return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 7) -; Inst 0: movz x0, #56780 -; Inst 1: uxth w0, w0 -; Inst 2: movz x1, #56780 -; Inst 3: subs wzr, w0, w1, UXTH -; Inst 4: cset x0, ne -; Inst 5: and w0, w0, #1 -; Inst 6: ret -; }} +; block0: +; movz x3, #56780 +; uxth w5, w3 +; movz x7, #56780 +; subs wzr, w5, w7, UXTH +; cset x4, ne +; and w0, w4, #1 +; ret diff --git a/cranelift/filetests/filetests/isa/aarch64/jumptable.clif b/cranelift/filetests/filetests/isa/aarch64/jumptable.clif index f7ded6aa88..6ce538de43 100644 --- a/cranelift/filetests/filetests/isa/aarch64/jumptable.clif +++ b/cranelift/filetests/filetests/isa/aarch64/jumptable.clif @@ -1,4 +1,4 @@ -test compile +test compile precise-output set unwind_info=false target aarch64 @@ -29,16 +29,31 @@ block5(v5: i64): return v6 } -; check: subs wzr, w0, #3 -; nextln: b.hs label1 ; adr x1, pc+16 ; ldrsw x2, [x1, x0, LSL 2] ; add x1, x1, x2 ; br x1 ; jt_entries - -; check: movz x1, #1 -; nextln: b - -; check: movz x1, #2 -; nextln: b - -; check: movz x1, #3 - -; check: add x0, x0, x1 +; block0: +; emit_island 36 +; subs wzr, w0, #3 +; b.hs label1 ; adr x15, pc+16 ; ldrsw x1, [x15, x0, LSL 2] ; add x15, x15, x1 ; br x15 ; jt_entries [Label(MachLabel(3)), Label(MachLabel(5)), Label(MachLabel(7))] +; block1: +; movz x5, #4 +; b label2 +; block2: +; b label9 +; block3: +; movz x5, #1 +; b label4 +; block4: +; b label9 +; block5: +; movz x5, #2 +; b label6 +; block6: +; b label9 +; block7: +; movz x5, #3 +; b label8 +; block8: +; b label9 +; block9: +; add x0, x0, x5 +; ret diff --git a/cranelift/filetests/filetests/isa/aarch64/multivalue-ret.clif b/cranelift/filetests/filetests/isa/aarch64/multivalue-ret.clif index c4aa72309c..d6782da015 100644 --- a/cranelift/filetests/filetests/isa/aarch64/multivalue-ret.clif +++ b/cranelift/filetests/filetests/isa/aarch64/multivalue-ret.clif @@ -10,13 +10,8 @@ block1: return v0, v1 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block1) -; (instruction range: 0 .. 3) -; Inst 0: movz x0, #1 -; Inst 1: movz x1, #2 -; Inst 2: ret -; }} +; block0: +; movz x0, #1 +; movz x1, #2 +; ret diff --git a/cranelift/filetests/filetests/isa/aarch64/narrow-arithmetic.clif b/cranelift/filetests/filetests/isa/aarch64/narrow-arithmetic.clif index 23a3fc1d6e..30373affab 100644 --- a/cranelift/filetests/filetests/isa/aarch64/narrow-arithmetic.clif +++ b/cranelift/filetests/filetests/isa/aarch64/narrow-arithmetic.clif @@ -8,14 +8,9 @@ block0(v0: i8, v1: i8): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: add w0, w0, w1 -; Inst 1: ret -; }} +; block0: +; add w0, w0, w1 +; ret function %add16(i16, i16) -> i16 { block0(v0: i16, v1: i16): @@ -23,14 +18,9 @@ block0(v0: i16, v1: i16): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: add w0, w0, w1 -; Inst 1: ret -; }} +; block0: +; add w0, w0, w1 +; ret function %add32(i32, i32) -> i32 { block0(v0: i32, v1: i32): @@ -38,14 +28,9 @@ block0(v0: i32, v1: i32): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: add w0, w0, w1 -; Inst 1: ret -; }} +; block0: +; add w0, w0, w1 +; ret function %add32_8(i32, i8) -> i32 { block0(v0: i32, v1: i8): @@ -54,14 +39,9 @@ block0(v0: i32, v1: i8): return v3 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: add w0, w0, w1, SXTB -; Inst 1: ret -; }} +; block0: +; add w0, w0, w1, SXTB +; ret function %add64_32(i64, i32) -> i64 { block0(v0: i64, v1: i32): @@ -70,12 +50,7 @@ block0(v0: i64, v1: i32): return v3 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: add x0, x0, x1, SXTW -; Inst 1: ret -; }} +; block0: +; add x0, x0, x1, SXTW +; ret diff --git a/cranelift/filetests/filetests/isa/aarch64/prologue.clif b/cranelift/filetests/filetests/isa/aarch64/prologue.clif index 40934abd65..6742f1f0f1 100644 --- a/cranelift/filetests/filetests/isa/aarch64/prologue.clif +++ b/cranelift/filetests/filetests/isa/aarch64/prologue.clif @@ -1,4 +1,4 @@ -test compile +test compile precise-output set unwind_info=false target aarch64 @@ -75,19 +75,85 @@ block0(v0: f64): return v62 } -; check: stp fp, lr, [sp, #-16]! -; nextln: mov fp, sp -; nextln: stp d14, d15, [sp, #-16]! -; nextln: stp d12, d13, [sp, #-16]! -; nextln: stp d10, d11, [sp, #-16]! -; nextln: stp d8, d9, [sp, #-16]! - -; check: ldp d8, d9, [sp], #16 -; nextln: ldp d10, d11, [sp], #16 -; nextln: ldp d12, d13, [sp], #16 -; nextln: ldp d14, d15, [sp], #16 -; nextln: ldp fp, lr, [sp], #16 -; nextln: ret +; stp fp, lr, [sp, #-16]! +; mov fp, sp +; stp d14, d15, [sp, #-16]! +; stp d12, d13, [sp, #-16]! +; stp d10, d11, [sp, #-16]! +; stp d8, d9, [sp, #-16]! +; sub sp, sp, #16 +; block0: +; fadd d4, d0, d0 +; fadd d6, d0, d0 +; str q6, [sp] +; fadd d6, d0, d0 +; fadd d8, d0, d0 +; fadd d10, d0, d0 +; fadd d12, d0, d0 +; fadd d14, d0, d0 +; fadd d1, d0, d0 +; fadd d3, d0, d0 +; fadd d5, d0, d0 +; fadd d7, d0, d0 +; fadd d9, d0, d0 +; fadd d11, d0, d0 +; fadd d13, d0, d0 +; fadd d16, d0, d0 +; fadd d15, d0, d0 +; fadd d20, d0, d0 +; fadd d22, d0, d0 +; fadd d24, d0, d0 +; fadd d26, d0, d0 +; fadd d28, d0, d0 +; fadd d30, d0, d0 +; fadd d17, d0, d0 +; fadd d19, d0, d0 +; fadd d21, d0, d0 +; fadd d23, d0, d0 +; fadd d25, d0, d0 +; fadd d27, d0, d0 +; fadd d29, d0, d0 +; fadd d18, d0, d0 +; fadd d2, d0, d0 +; fadd d0, d0, d4 +; ldr q4, [sp] +; fadd d6, d4, d6 +; fadd d4, d8, d10 +; fadd d10, d12, d14 +; fadd d8, d1, d3 +; fadd d14, d5, d7 +; fadd d12, d9, d11 +; fadd d3, d13, d16 +; fadd d1, d15, d20 +; fadd d7, d22, d24 +; fadd d5, d26, d28 +; fadd d11, d30, d17 +; fadd d9, d19, d21 +; fadd d15, d23, d25 +; fadd d13, d27, d29 +; fadd d2, d18, d2 +; fadd d0, d0, d6 +; fadd d6, d4, d10 +; fadd d4, d8, d14 +; fadd d10, d12, d3 +; fadd d8, d1, d7 +; fadd d11, d5, d11 +; fadd d12, d9, d15 +; fadd d14, d13, d2 +; fadd d0, d0, d6 +; fadd d2, d4, d10 +; fadd d4, d8, d11 +; fadd d6, d12, d14 +; fadd d8, d0, d2 +; fadd d10, d4, d6 +; fadd d0, d8, d10 +; add sp, sp, #16 +; ldp d8, d9, [sp], #16 +; ldp d10, d11, [sp], #16 +; ldp d12, d13, [sp], #16 +; ldp d14, d15, [sp], #16 +; ldp fp, lr, [sp], #16 +; ret function %f2(i64) -> i64 { block0(v0: i64): @@ -135,14 +201,49 @@ block0(v0: i64): return v36 } -; check: stp fp, lr, [sp, #-16]! -; nextln: mov fp, sp -; nextln: str x22, [sp, #-16]! -; nextln: stp x19, x20, [sp, #-16]! -; nextln: add x1, x0, x0 +; stp fp, lr, [sp, #-16]! +; mov fp, sp +; str x28, [sp, #-16]! +; stp x19, x21, [sp, #-16]! +; block0: +; add x6, x0, x0 +; add x7, x0, x6 +; add x8, x0, x7 +; add x9, x0, x8 +; add x10, x0, x9 +; add x11, x0, x10 +; add x12, x0, x11 +; add x13, x0, x12 +; add x14, x0, x13 +; add x15, x0, x14 +; add x1, x0, x15 +; add x2, x0, x1 +; add x3, x0, x2 +; add x4, x0, x3 +; add x5, x0, x4 +; add x28, x0, x5 +; add x21, x0, x28 +; add x19, x0, x21 +; add x6, x0, x6 +; add x7, x7, x8 +; add x8, x9, x10 +; add x9, x11, x12 +; add x10, x13, x14 +; add x11, x15, x1 +; add x12, x2, x3 +; add x13, x4, x5 +; add x14, x28, x21 +; add x6, x19, x6 +; add x7, x7, x8 +; add x8, x9, x10 +; add x9, x11, x12 +; add x10, x13, x14 +; add x6, x6, x7 +; add x7, x8, x9 +; add x6, x10, x6 +; add x0, x7, x6 +; ldp x19, x21, [sp], #16 +; ldr x28, [sp], #16 +; ldp fp, lr, [sp], #16 +; ret -; check: add x0, x1, x0 -; nextln: ldp x19, x20, [sp], #16 -; nextln: ldr x22, [sp], #16 -; nextln: ldp fp, lr, [sp], #16 -; nextln: ret diff --git a/cranelift/filetests/filetests/isa/aarch64/reduce.clif b/cranelift/filetests/filetests/isa/aarch64/reduce.clif index 5d6ffa1700..9f85ce9689 100644 --- a/cranelift/filetests/filetests/isa/aarch64/reduce.clif +++ b/cranelift/filetests/filetests/isa/aarch64/reduce.clif @@ -8,13 +8,8 @@ block0(v0: i128): return v1 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 1) -; Inst 0: ret -; }} +; block0: +; ret function %ireduce_128_32(i128) -> i32 { block0(v0: i128): @@ -22,13 +17,8 @@ block0(v0: i128): return v1 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 1) -; Inst 0: ret -; }} +; block0: +; ret function %ireduce_128_16(i128) -> i16 { block0(v0: i128): @@ -36,13 +26,8 @@ block0(v0: i128): return v1 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 1) -; Inst 0: ret -; }} +; block0: +; ret function %ireduce_128_8(i128) -> i8 { block0(v0: i128): @@ -50,11 +35,6 @@ block0(v0: i128): return v1 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 1) -; Inst 0: ret -; }} +; block0: +; ret diff --git a/cranelift/filetests/filetests/isa/aarch64/reftypes.clif b/cranelift/filetests/filetests/isa/aarch64/reftypes.clif index 4b7d855574..cd66dd952c 100644 --- a/cranelift/filetests/filetests/isa/aarch64/reftypes.clif +++ b/cranelift/filetests/filetests/isa/aarch64/reftypes.clif @@ -7,13 +7,8 @@ block0(v0: r64): return v0 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 1) -; Inst 0: ret -; }} +; block0: +; ret function %f1(r64) -> b1 { block0(v0: r64): @@ -21,15 +16,10 @@ block0(v0: r64): return v1 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 3) -; Inst 0: subs xzr, x0, #0 -; Inst 1: cset x0, eq -; Inst 2: ret -; }} +; block0: +; subs xzr, x0, #0 +; cset x0, eq +; ret function %f2(r64) -> b1 { block0(v0: r64): @@ -37,15 +27,10 @@ block0(v0: r64): return v1 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 3) -; Inst 0: adds xzr, x0, #1 -; Inst 1: cset x0, eq -; Inst 2: ret -; }} +; block0: +; adds xzr, x0, #1 +; cset x0, eq +; ret function %f3() -> r64 { block0: @@ -53,14 +38,9 @@ block0: return v0 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: movz x0, #0 -; Inst 1: ret -; }} +; block0: +; movz x0, #0 +; ret function %f4(r64, r64) -> r64, r64, r64 { fn0 = %f(r64) -> b1 @@ -83,63 +63,38 @@ block3(v7: r64, v8: r64): return v7, v8, v9 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (successor: Block 1) -; (successor: Block 3) -; (instruction range: 0 .. 18) -; Inst 0: stp fp, lr, [sp, #-16]! -; Inst 1: mov fp, sp -; Inst 2: stp x19, x20, [sp, #-16]! -; Inst 3: sub sp, sp, #32 -; Inst 4: mov x19, x0 -; Inst 5: mov x20, x1 -; Inst 6: mov x0, x19 -; Inst 7: ldr x1, 8 ; b 12 ; data TestCase { length: 1, ascii: [102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] } + 0 -; Inst 8: stur x0, [sp, #8] -; Inst 9: stur x19, [sp, #16] -; Inst 10: stur x20, [sp, #24] -; (safepoint: slots [S0, S1, S2] with EmitState EmitState { virtual_sp_offset: 0, nominal_sp_to_fp: 0, stack_map: None, cur_srcloc: SourceLoc(4294967295) }) -; Inst 11: blr x1 -; Inst 12: ldur x19, [sp, #16] -; Inst 13: ldur x20, [sp, #24] -; Inst 14: mov x1, sp -; Inst 15: str x19, [x1] -; Inst 16: and w0, w0, #1 -; Inst 17: cbz x0, label1 ; b label3 -; Block 1: -; (original IR block: block1) -; (successor: Block 2) -; (instruction range: 18 .. 19) -; Inst 18: b label2 -; Block 2: -; (successor: Block 5) -; (instruction range: 19 .. 21) -; Inst 19: mov x0, x20 -; Inst 20: b label5 -; Block 3: -; (original IR block: block2) -; (successor: Block 4) -; (instruction range: 21 .. 22) -; Inst 21: b label4 -; Block 4: -; (successor: Block 5) -; (instruction range: 22 .. 25) -; Inst 22: mov x0, x19 -; Inst 23: mov x19, x20 -; Inst 24: b label5 -; Block 5: -; (original IR block: block3) -; (instruction range: 25 .. 33) -; Inst 25: mov x1, sp -; Inst 26: ldr x1, [x1] -; Inst 27: mov x2, x1 -; Inst 28: mov x1, x19 -; Inst 29: add sp, sp, #32 -; Inst 30: ldp x19, x20, [sp], #16 -; Inst 31: ldp fp, lr, [sp], #16 -; Inst 32: ret -; }} +; stp fp, lr, [sp, #-16]! +; mov fp, sp +; sub sp, sp, #32 +; block0: +; mov x4, x1 +; mov x2, x0 +; ldr x3, 8 ; b 12 ; data TestCase { length: 1, ascii: [102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] } + 0 +; str x2, [sp, #8] +; str x4, [sp, #16] +; blr x3 +; ldr x2, [sp, #8] +; mov x9, sp +; mov x12, x2 +; str x12, [x9] +; and w7, w0, #1 +; cbz x7, label1 ; b label3 +; block1: +; b label2 +; block2: +; mov x1, x12 +; ldr x0, [sp, #16] +; b label5 +; block3: +; b label4 +; block4: +; mov x0, x12 +; ldr x1, [sp, #16] +; b label5 +; block5: +; mov x4, sp +; ldr x2, [x4] +; add sp, sp, #32 +; ldp fp, lr, [sp], #16 +; ret diff --git a/cranelift/filetests/filetests/isa/aarch64/shift-op.clif b/cranelift/filetests/filetests/isa/aarch64/shift-op.clif index b2074fb658..f2400cc8df 100644 --- a/cranelift/filetests/filetests/isa/aarch64/shift-op.clif +++ b/cranelift/filetests/filetests/isa/aarch64/shift-op.clif @@ -10,14 +10,9 @@ block0(v0: i64): return v3 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: add x0, x0, x0, LSL 3 -; Inst 1: ret -; }} +; block0: +; add x0, x0, x0, LSL 3 +; ret function %f(i32) -> i32 { block0(v0: i32): @@ -26,12 +21,7 @@ block0(v0: i32): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: lsl w0, w0, #21 -; Inst 1: ret -; }} +; block0: +; lsl w0, w0, #21 +; ret diff --git a/cranelift/filetests/filetests/isa/aarch64/shift-rotate.clif b/cranelift/filetests/filetests/isa/aarch64/shift-rotate.clif index e4b602beb8..8468f76ebd 100644 --- a/cranelift/filetests/filetests/isa/aarch64/shift-rotate.clif +++ b/cranelift/filetests/filetests/isa/aarch64/shift-rotate.clif @@ -12,37 +12,30 @@ block0(v0: i128, v1: i128): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 25) -; Inst 0: mov x4, x1 -; Inst 1: orr x1, xzr, #128 -; Inst 2: sub x1, x1, x2 -; Inst 3: lsr x5, x0, x2 -; Inst 4: lsr x3, x4, x2 -; Inst 5: orn w6, wzr, w2 -; Inst 6: lsl x7, x4, #1 -; Inst 7: lsl x6, x7, x6 -; Inst 8: orr x5, x5, x6 -; Inst 9: ands xzr, x2, #64 -; Inst 10: csel x2, x3, x5, ne -; Inst 11: csel x3, xzr, x3, ne -; Inst 12: lsl x5, x0, x1 -; Inst 13: lsl x4, x4, x1 -; Inst 14: orn w6, wzr, w1 -; Inst 15: lsr x0, x0, #1 -; Inst 16: lsr x0, x0, x6 -; Inst 17: orr x0, x4, x0 -; Inst 18: ands xzr, x1, #64 -; Inst 19: csel x1, xzr, x5, ne -; Inst 20: csel x0, x5, x0, ne -; Inst 21: orr x3, x3, x0 -; Inst 22: orr x0, x2, x1 -; Inst 23: mov x1, x3 -; Inst 24: ret -; }} +; block0: +; orr x10, xzr, #128 +; sub x12, x10, x2 +; lsr x14, x0, x2 +; lsr x3, x1, x2 +; orn w4, wzr, w2 +; lsl x5, x1, #1 +; lsl x6, x5, x4 +; orr x8, x14, x6 +; ands xzr, x2, #64 +; csel x11, x3, x8, ne +; csel x13, xzr, x3, ne +; lsl x15, x0, x12 +; lsl x1, x1, x12 +; orn w3, wzr, w12 +; lsr x5, x0, #1 +; lsr x7, x5, x3 +; orr x9, x1, x7 +; ands xzr, x12, #64 +; csel x12, xzr, x15, ne +; csel x14, x15, x9, ne +; orr x1, x13, x14 +; orr x0, x11, x12 +; ret function %f0(i64, i64) -> i64 { block0(v0: i64, v1: i64): @@ -50,14 +43,9 @@ block0(v0: i64, v1: i64): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: ror x0, x0, x1 -; Inst 1: ret -; }} +; block0: +; ror x0, x0, x1 +; ret function %f1(i32, i32) -> i32 { block0(v0: i32, v1: i32): @@ -65,14 +53,9 @@ block0(v0: i32, v1: i32): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: ror w0, w0, w1 -; Inst 1: ret -; }} +; block0: +; ror w0, w0, w1 +; ret function %f2(i16, i16) -> i16 { block0(v0: i16, v1: i16): @@ -80,20 +63,15 @@ block0(v0: i16, v1: i16): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 8) -; Inst 0: uxth w0, w0 -; Inst 1: and w1, w1, #15 -; Inst 2: sub w2, w1, #16 -; Inst 3: sub w2, wzr, w2 -; Inst 4: lsr w1, w0, w1 -; Inst 5: lsl w0, w0, w2 -; Inst 6: orr w0, w0, w1 -; Inst 7: ret -; }} +; block0: +; uxth w5, w0 +; and w7, w1, #15 +; sub w9, w7, #16 +; sub w11, wzr, w9 +; lsr w13, w5, w7 +; lsl w15, w5, w11 +; orr w0, w15, w13 +; ret function %f3(i8, i8) -> i8 { block0(v0: i8, v1: i8): @@ -101,20 +79,15 @@ block0(v0: i8, v1: i8): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 8) -; Inst 0: uxtb w0, w0 -; Inst 1: and w1, w1, #7 -; Inst 2: sub w2, w1, #8 -; Inst 3: sub w2, wzr, w2 -; Inst 4: lsr w1, w0, w1 -; Inst 5: lsl w0, w0, w2 -; Inst 6: orr w0, w0, w1 -; Inst 7: ret -; }} +; block0: +; uxtb w5, w0 +; and w7, w1, #7 +; sub w9, w7, #8 +; sub w11, wzr, w9 +; lsr w13, w5, w7 +; lsl w15, w5, w11 +; orr w0, w15, w13 +; ret function %i128_rotl(i128, i128) -> i128 { block0(v0: i128, v1: i128): @@ -122,36 +95,30 @@ block0(v0: i128, v1: i128): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 24) -; Inst 0: mov x4, x0 -; Inst 1: orr x0, xzr, #128 -; Inst 2: sub x0, x0, x2 -; Inst 3: lsl x3, x4, x2 -; Inst 4: lsl x5, x1, x2 -; Inst 5: orn w6, wzr, w2 -; Inst 6: lsr x7, x4, #1 -; Inst 7: lsr x6, x7, x6 -; Inst 8: orr x5, x5, x6 -; Inst 9: ands xzr, x2, #64 -; Inst 10: csel x2, xzr, x3, ne -; Inst 11: csel x3, x3, x5, ne -; Inst 12: lsr x5, x4, x0 -; Inst 13: lsr x4, x1, x0 -; Inst 14: orn w6, wzr, w0 -; Inst 15: lsl x1, x1, #1 -; Inst 16: lsl x1, x1, x6 -; Inst 17: orr x1, x5, x1 -; Inst 18: ands xzr, x0, #64 -; Inst 19: csel x0, x4, x1, ne -; Inst 20: csel x1, xzr, x4, ne -; Inst 21: orr x0, x2, x0 -; Inst 22: orr x1, x3, x1 -; Inst 23: ret -; }} +; block0: +; orr x10, xzr, #128 +; sub x12, x10, x2 +; lsl x14, x0, x2 +; lsl x3, x1, x2 +; orn w4, wzr, w2 +; lsr x5, x0, #1 +; lsr x6, x5, x4 +; orr x8, x3, x6 +; ands xzr, x2, #64 +; csel x11, xzr, x14, ne +; csel x13, x14, x8, ne +; lsr x15, x0, x12 +; lsr x2, x1, x12 +; orn w3, wzr, w12 +; lsl x5, x1, #1 +; lsl x7, x5, x3 +; orr x9, x15, x7 +; ands xzr, x12, #64 +; csel x12, x2, x9, ne +; csel x14, xzr, x2, ne +; orr x0, x11, x12 +; orr x1, x13, x14 +; ret function %f4(i64, i64) -> i64 { block0(v0: i64, v1: i64): @@ -159,15 +126,10 @@ block0(v0: i64, v1: i64): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 3) -; Inst 0: sub x1, xzr, x1 -; Inst 1: ror x0, x0, x1 -; Inst 2: ret -; }} +; block0: +; sub x5, xzr, x1 +; ror x0, x0, x5 +; ret function %f5(i32, i32) -> i32 { block0(v0: i32, v1: i32): @@ -175,15 +137,10 @@ block0(v0: i32, v1: i32): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 3) -; Inst 0: sub w1, wzr, w1 -; Inst 1: ror w0, w0, w1 -; Inst 2: ret -; }} +; block0: +; sub w5, wzr, w1 +; ror w0, w0, w5 +; ret function %f6(i16, i16) -> i16 { block0(v0: i16, v1: i16): @@ -191,21 +148,16 @@ block0(v0: i16, v1: i16): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 9) -; Inst 0: sub w1, wzr, w1 -; Inst 1: uxth w0, w0 -; Inst 2: and w1, w1, #15 -; Inst 3: sub w2, w1, #16 -; Inst 4: sub w2, wzr, w2 -; Inst 5: lsr w1, w0, w1 -; Inst 6: lsl w0, w0, w2 -; Inst 7: orr w0, w0, w1 -; Inst 8: ret -; }} +; block0: +; sub w5, wzr, w1 +; uxth w7, w0 +; and w9, w5, #15 +; sub w11, w9, #16 +; sub w13, wzr, w11 +; lsr w15, w7, w9 +; lsl w1, w7, w13 +; orr w0, w1, w15 +; ret function %f7(i8, i8) -> i8 { block0(v0: i8, v1: i8): @@ -213,21 +165,16 @@ block0(v0: i8, v1: i8): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 9) -; Inst 0: sub w1, wzr, w1 -; Inst 1: uxtb w0, w0 -; Inst 2: and w1, w1, #7 -; Inst 3: sub w2, w1, #8 -; Inst 4: sub w2, wzr, w2 -; Inst 5: lsr w1, w0, w1 -; Inst 6: lsl w0, w0, w2 -; Inst 7: orr w0, w0, w1 -; Inst 8: ret -; }} +; block0: +; sub w5, wzr, w1 +; uxtb w7, w0 +; and w9, w5, #7 +; sub w11, w9, #8 +; sub w13, wzr, w11 +; lsr w15, w7, w9 +; lsl w1, w7, w13 +; orr w0, w1, w15 +; ret function %f8(i64, i64) -> i64 { block0(v0: i64, v1: i64): @@ -235,14 +182,9 @@ block0(v0: i64, v1: i64): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: lsr x0, x0, x1 -; Inst 1: ret -; }} +; block0: +; lsr x0, x0, x1 +; ret function %f9(i32, i32) -> i32 { block0(v0: i32, v1: i32): @@ -250,14 +192,9 @@ block0(v0: i32, v1: i32): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: lsr w0, w0, w1 -; Inst 1: ret -; }} +; block0: +; lsr w0, w0, w1 +; ret function %f10(i16, i16) -> i16 { block0(v0: i16, v1: i16): @@ -265,16 +202,11 @@ block0(v0: i16, v1: i16): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 4) -; Inst 0: uxth w0, w0 -; Inst 1: and w1, w1, #15 -; Inst 2: lsr w0, w0, w1 -; Inst 3: ret -; }} +; block0: +; uxth w5, w0 +; and w7, w1, #15 +; lsr w0, w5, w7 +; ret function %f11(i8, i8) -> i8 { block0(v0: i8, v1: i8): @@ -282,16 +214,11 @@ block0(v0: i8, v1: i8): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 4) -; Inst 0: uxtb w0, w0 -; Inst 1: and w1, w1, #7 -; Inst 2: lsr w0, w0, w1 -; Inst 3: ret -; }} +; block0: +; uxtb w5, w0 +; and w7, w1, #7 +; lsr w0, w5, w7 +; ret function %f12(i64, i64) -> i64 { block0(v0: i64, v1: i64): @@ -299,14 +226,9 @@ block0(v0: i64, v1: i64): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: lsl x0, x0, x1 -; Inst 1: ret -; }} +; block0: +; lsl x0, x0, x1 +; ret function %f13(i32, i32) -> i32 { block0(v0: i32, v1: i32): @@ -314,14 +236,9 @@ block0(v0: i32, v1: i32): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: lsl w0, w0, w1 -; Inst 1: ret -; }} +; block0: +; lsl w0, w0, w1 +; ret function %f14(i16, i16) -> i16 { block0(v0: i16, v1: i16): @@ -329,15 +246,10 @@ block0(v0: i16, v1: i16): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 3) -; Inst 0: and w1, w1, #15 -; Inst 1: lsl w0, w0, w1 -; Inst 2: ret -; }} +; block0: +; and w5, w1, #15 +; lsl w0, w0, w5 +; ret function %f15(i8, i8) -> i8 { block0(v0: i8, v1: i8): @@ -345,15 +257,10 @@ block0(v0: i8, v1: i8): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 3) -; Inst 0: and w1, w1, #7 -; Inst 1: lsl w0, w0, w1 -; Inst 2: ret -; }} +; block0: +; and w5, w1, #7 +; lsl w0, w0, w5 +; ret function %f16(i64, i64) -> i64 { block0(v0: i64, v1: i64): @@ -361,14 +268,9 @@ block0(v0: i64, v1: i64): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: asr x0, x0, x1 -; Inst 1: ret -; }} +; block0: +; asr x0, x0, x1 +; ret function %f17(i32, i32) -> i32 { block0(v0: i32, v1: i32): @@ -376,14 +278,9 @@ block0(v0: i32, v1: i32): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: asr w0, w0, w1 -; Inst 1: ret -; }} +; block0: +; asr w0, w0, w1 +; ret function %f18(i16, i16) -> i16 { block0(v0: i16, v1: i16): @@ -391,16 +288,11 @@ block0(v0: i16, v1: i16): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 4) -; Inst 0: sxth w0, w0 -; Inst 1: and w1, w1, #15 -; Inst 2: asr w0, w0, w1 -; Inst 3: ret -; }} +; block0: +; sxth w5, w0 +; and w7, w1, #15 +; asr w0, w5, w7 +; ret function %f19(i8, i8) -> i8 { block0(v0: i8, v1: i8): @@ -408,16 +300,11 @@ block0(v0: i8, v1: i8): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 4) -; Inst 0: sxtb w0, w0 -; Inst 1: and w1, w1, #7 -; Inst 2: asr w0, w0, w1 -; Inst 3: ret -; }} +; block0: +; sxtb w5, w0 +; and w7, w1, #7 +; asr w0, w5, w7 +; ret function %f20(i64) -> i64 { block0(v0: i64): @@ -426,14 +313,9 @@ block0(v0: i64): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: ror x0, x0, #17 -; Inst 1: ret -; }} +; block0: +; ror x0, x0, #17 +; ret function %f21(i64) -> i64 { block0(v0: i64): @@ -442,14 +324,9 @@ block0(v0: i64): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: ror x0, x0, #47 -; Inst 1: ret -; }} +; block0: +; ror x0, x0, #47 +; ret function %f22(i32) -> i32 { block0(v0: i32): @@ -458,14 +335,9 @@ block0(v0: i32): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: ror w0, w0, #15 -; Inst 1: ret -; }} +; block0: +; ror w0, w0, #15 +; ret function %f23(i16) -> i16 { block0(v0: i16): @@ -474,17 +346,12 @@ block0(v0: i16): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 5) -; Inst 0: uxth w0, w0 -; Inst 1: lsr w1, w0, #6 -; Inst 2: lsl w0, w0, #10 -; Inst 3: orr w0, w0, w1 -; Inst 4: ret -; }} +; block0: +; uxth w3, w0 +; lsr w5, w3, #6 +; lsl w7, w3, #10 +; orr w0, w7, w5 +; ret function %f24(i8) -> i8 { block0(v0: i8): @@ -493,17 +360,12 @@ block0(v0: i8): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 5) -; Inst 0: uxtb w0, w0 -; Inst 1: lsr w1, w0, #5 -; Inst 2: lsl w0, w0, #3 -; Inst 3: orr w0, w0, w1 -; Inst 4: ret -; }} +; block0: +; uxtb w3, w0 +; lsr w5, w3, #5 +; lsl w7, w3, #3 +; orr w0, w7, w5 +; ret function %f25(i64) -> i64 { block0(v0: i64): @@ -512,14 +374,9 @@ block0(v0: i64): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: lsr x0, x0, #17 -; Inst 1: ret -; }} +; block0: +; lsr x0, x0, #17 +; ret function %f26(i64) -> i64 { block0(v0: i64): @@ -528,14 +385,9 @@ block0(v0: i64): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: asr x0, x0, #17 -; Inst 1: ret -; }} +; block0: +; asr x0, x0, #17 +; ret function %f27(i64) -> i64 { block0(v0: i64): @@ -544,12 +396,7 @@ block0(v0: i64): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: lsl x0, x0, #17 -; Inst 1: ret -; }} +; block0: +; lsl x0, x0, #17 +; ret diff --git a/cranelift/filetests/filetests/isa/aarch64/simd-extmul.clif b/cranelift/filetests/filetests/isa/aarch64/simd-extmul.clif index b594c60eba..532cdb82d2 100644 --- a/cranelift/filetests/filetests/isa/aarch64/simd-extmul.clif +++ b/cranelift/filetests/filetests/isa/aarch64/simd-extmul.clif @@ -10,14 +10,9 @@ block0(v0: i8x16, v1: i8x16): return v4 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: smull v0.8h, v0.8b, v1.8b -; Inst 1: ret -; }} +; block0: +; smull v0.8h, v0.8b, v1.8b +; ret function %fn2(i8x16, i8x16) -> i16x8 { block0(v0: i8x16, v1: i8x16): @@ -27,14 +22,9 @@ block0(v0: i8x16, v1: i8x16): return v4 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: smull2 v0.8h, v0.16b, v1.16b -; Inst 1: ret -; }} +; block0: +; smull2 v0.8h, v0.16b, v1.16b +; ret function %fn3(i16x8, i16x8) -> i32x4 { block0(v0: i16x8, v1: i16x8): @@ -44,14 +34,9 @@ block0(v0: i16x8, v1: i16x8): return v4 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: smull v0.4s, v0.4h, v1.4h -; Inst 1: ret -; }} +; block0: +; smull v0.4s, v0.4h, v1.4h +; ret function %fn4(i16x8, i16x8) -> i32x4 { block0(v0: i16x8, v1: i16x8): @@ -61,14 +46,9 @@ block0(v0: i16x8, v1: i16x8): return v4 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: smull2 v0.4s, v0.8h, v1.8h -; Inst 1: ret -; }} +; block0: +; smull2 v0.4s, v0.8h, v1.8h +; ret function %fn5(i32x4, i32x4) -> i64x2 { block0(v0: i32x4, v1: i32x4): @@ -78,14 +58,9 @@ block0(v0: i32x4, v1: i32x4): return v4 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: smull v0.2d, v0.2s, v1.2s -; Inst 1: ret -; }} +; block0: +; smull v0.2d, v0.2s, v1.2s +; ret function %fn6(i32x4, i32x4) -> i64x2 { block0(v0: i32x4, v1: i32x4): @@ -95,14 +70,9 @@ block0(v0: i32x4, v1: i32x4): return v4 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: smull2 v0.2d, v0.4s, v1.4s -; Inst 1: ret -; }} +; block0: +; smull2 v0.2d, v0.4s, v1.4s +; ret function %fn7(i8x16, i8x16) -> i16x8 { block0(v0: i8x16, v1: i8x16): @@ -112,14 +82,9 @@ block0(v0: i8x16, v1: i8x16): return v4 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: umull v0.8h, v0.8b, v1.8b -; Inst 1: ret -; }} +; block0: +; umull v0.8h, v0.8b, v1.8b +; ret function %fn8(i8x16, i8x16) -> i16x8 { block0(v0: i8x16, v1: i8x16): @@ -129,14 +94,9 @@ block0(v0: i8x16, v1: i8x16): return v4 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: umull2 v0.8h, v0.16b, v1.16b -; Inst 1: ret -; }} +; block0: +; umull2 v0.8h, v0.16b, v1.16b +; ret function %fn9(i16x8, i16x8) -> i32x4 { block0(v0: i16x8, v1: i16x8): @@ -146,14 +106,9 @@ block0(v0: i16x8, v1: i16x8): return v4 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: umull v0.4s, v0.4h, v1.4h -; Inst 1: ret -; }} +; block0: +; umull v0.4s, v0.4h, v1.4h +; ret function %fn10(i16x8, i16x8) -> i32x4 { block0(v0: i16x8, v1: i16x8): @@ -163,14 +118,9 @@ block0(v0: i16x8, v1: i16x8): return v4 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: umull2 v0.4s, v0.8h, v1.8h -; Inst 1: ret -; }} +; block0: +; umull2 v0.4s, v0.8h, v1.8h +; ret function %fn11(i32x4, i32x4) -> i64x2 { block0(v0: i32x4, v1: i32x4): @@ -180,14 +130,9 @@ block0(v0: i32x4, v1: i32x4): return v4 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: umull v0.2d, v0.2s, v1.2s -; Inst 1: ret -; }} +; block0: +; umull v0.2d, v0.2s, v1.2s +; ret function %fn12(i32x4, i32x4) -> i64x2 { block0(v0: i32x4, v1: i32x4): @@ -197,12 +142,7 @@ block0(v0: i32x4, v1: i32x4): return v4 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: umull2 v0.2d, v0.4s, v1.4s -; Inst 1: ret -; }} +; block0: +; umull2 v0.2d, v0.4s, v1.4s +; ret diff --git a/cranelift/filetests/filetests/isa/aarch64/simd-pairwise-add.clif b/cranelift/filetests/filetests/isa/aarch64/simd-pairwise-add.clif index 7be0e9705c..d1d81c61f7 100644 --- a/cranelift/filetests/filetests/isa/aarch64/simd-pairwise-add.clif +++ b/cranelift/filetests/filetests/isa/aarch64/simd-pairwise-add.clif @@ -11,14 +11,9 @@ block0(v0: i8x16): return v3 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: saddlp v0.8h, v0.16b -; Inst 1: ret -; }} +; block0: +; saddlp v0.8h, v0.16b +; ret function %fn2(i8x16) -> i16x8 { block0(v0: i8x16): @@ -28,14 +23,9 @@ block0(v0: i8x16): return v3 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: uaddlp v0.8h, v0.16b -; Inst 1: ret -; }} +; block0: +; uaddlp v0.8h, v0.16b +; ret function %fn3(i16x8) -> i32x4 { block0(v0: i16x8): @@ -45,14 +35,9 @@ block0(v0: i16x8): return v3 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: saddlp v0.4s, v0.8h -; Inst 1: ret -; }} +; block0: +; saddlp v0.4s, v0.8h +; ret function %fn4(i16x8) -> i32x4 { block0(v0: i16x8): @@ -62,14 +47,9 @@ block0(v0: i16x8): return v3 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: uaddlp v0.4s, v0.8h -; Inst 1: ret -; }} +; block0: +; uaddlp v0.4s, v0.8h +; ret function %fn5(i8x16, i8x16) -> i16x8 { block0(v0: i8x16, v1: i8x16): @@ -79,16 +59,11 @@ block0(v0: i8x16, v1: i8x16): return v4 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 4) -; Inst 0: sxtl v0.8h, v0.8b -; Inst 1: sxtl2 v1.8h, v1.16b -; Inst 2: addp v0.8h, v0.8h, v1.8h -; Inst 3: ret -; }} +; block0: +; sxtl v4.8h, v0.8b +; sxtl2 v6.8h, v1.16b +; addp v0.8h, v4.8h, v6.8h +; ret function %fn6(i8x16, i8x16) -> i16x8 { block0(v0: i8x16, v1: i8x16): @@ -98,16 +73,11 @@ block0(v0: i8x16, v1: i8x16): return v4 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 4) -; Inst 0: uxtl v0.8h, v0.8b -; Inst 1: uxtl2 v1.8h, v1.16b -; Inst 2: addp v0.8h, v0.8h, v1.8h -; Inst 3: ret -; }} +; block0: +; uxtl v4.8h, v0.8b +; uxtl2 v6.8h, v1.16b +; addp v0.8h, v4.8h, v6.8h +; ret function %fn7(i8x16) -> i16x8 { block0(v0: i8x16): @@ -117,16 +87,11 @@ block0(v0: i8x16): return v3 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 4) -; Inst 0: uxtl v1.8h, v0.8b -; Inst 1: sxtl2 v0.8h, v0.16b -; Inst 2: addp v0.8h, v1.8h, v0.8h -; Inst 3: ret -; }} +; block0: +; uxtl v2.8h, v0.8b +; sxtl2 v4.8h, v0.16b +; addp v0.8h, v2.8h, v4.8h +; ret function %fn8(i8x16) -> i16x8 { block0(v0: i8x16): @@ -136,14 +101,9 @@ block0(v0: i8x16): return v3 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 4) -; Inst 0: sxtl v1.8h, v0.8b -; Inst 1: uxtl2 v0.8h, v0.16b -; Inst 2: addp v0.8h, v1.8h, v0.8h -; Inst 3: ret -; }} +; block0: +; sxtl v2.8h, v0.8b +; uxtl2 v4.8h, v0.16b +; addp v0.8h, v2.8h, v4.8h +; ret diff --git a/cranelift/filetests/filetests/isa/aarch64/simd.clif b/cranelift/filetests/filetests/isa/aarch64/simd.clif index 9514c79a1f..166d27b80b 100644 --- a/cranelift/filetests/filetests/isa/aarch64/simd.clif +++ b/cranelift/filetests/filetests/isa/aarch64/simd.clif @@ -9,16 +9,11 @@ block0: return v1 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 4) -; Inst 0: movz x0, #1 -; Inst 1: movk x0, #1, LSL #48 -; Inst 2: dup v0.2d, x0 -; Inst 3: ret -; }} +; block0: +; movz x2, #1 +; movk x2, #1, LSL #48 +; dup v0.2d, x2 +; ret function %f2() -> i16x8 { block0: @@ -28,15 +23,10 @@ block0: return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 3) -; Inst 0: movz x0, #42679 -; Inst 1: dup v0.8h, w0 -; Inst 2: ret -; }} +; block0: +; movz x2, #42679 +; dup v0.8h, w2 +; ret function %f3() -> b8x16 { block0: @@ -46,14 +36,9 @@ block0: return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: movi v0.16b, #255 -; Inst 1: ret -; }} +; block0: +; movi v0.16b, #255 +; ret function %f4(i32, i8x16, i8x16) -> i8x16 { block0(v0: i32, v1: i8x16, v2: i8x16): @@ -61,15 +46,10 @@ block0(v0: i32, v1: i8x16, v2: i8x16): return v3 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 3) -; Inst 0: subs wzr, w0, wzr -; Inst 1: vcsel v0.16b, v0.16b, v1.16b, ne (if-then-else diamond) -; Inst 2: ret -; }} +; block0: +; subs wzr, w0, wzr +; vcsel v0.16b, v0.16b, v1.16b, ne (if-then-else diamond) +; ret function %f5(i64) -> i8x16 { block0(v0: i64): @@ -78,14 +58,9 @@ block0(v0: i64): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: ld1r { v0.16b }, [x0] -; Inst 1: ret -; }} +; block0: +; ld1r { v0.16b }, [x0] +; ret function %f6(i64, i64) -> i8x16, i8x16 { block0(v0: i64, v1: i64): @@ -96,15 +71,10 @@ block0(v0: i64, v1: i64): return v4, v5 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 3) -; Inst 0: ld1r { v0.16b }, [x0] -; Inst 1: ld1r { v1.16b }, [x1] -; Inst 2: ret -; }} +; block0: +; ld1r { v0.16b }, [x0] +; ld1r { v1.16b }, [x1] +; ret function %f7(i64, i64) -> i8x16, i8x16 { block0(v0: i64, v1: i64): @@ -115,16 +85,11 @@ block0(v0: i64, v1: i64): return v4, v5 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 4) -; Inst 0: ldrb w0, [x0] -; Inst 1: ld1r { v0.16b }, [x1] -; Inst 2: dup v1.16b, w0 -; Inst 3: ret -; }} +; block0: +; ldrb w4, [x0] +; ld1r { v0.16b }, [x1] +; dup v1.16b, w4 +; ret function %f8(i64, i64) -> i8x16, i8x16 { block0(v0: i64, v1: i64): @@ -134,16 +99,11 @@ block0(v0: i64, v1: i64): return v3, v4 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 4) -; Inst 0: ldrb w0, [x0] -; Inst 1: dup v0.16b, w0 -; Inst 2: dup v1.16b, w0 -; Inst 3: ret -; }} +; block0: +; ldrb w4, [x0] +; dup v0.16b, w4 +; dup v1.16b, w4 +; ret function %f9() -> i32x2 { block0: @@ -152,15 +112,10 @@ block0: return v1 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 3) -; Inst 0: movi v0.2d, #18374687579166474495 -; Inst 1: fmov d0, d0 -; Inst 2: ret -; }} +; block0: +; movi v0.2d, #18374687579166474495 +; fmov d0, d0 +; ret function %f10() -> i32x4 { block0: @@ -169,14 +124,9 @@ block0: return v1 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: mvni v0.4s, #15, MSL #16 -; Inst 1: ret -; }} +; block0: +; mvni v0.4s, #15, MSL #16 +; ret function %f11() -> f32x4 { block0: @@ -185,12 +135,7 @@ block0: return v1 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: fmov v0.4s, #1.3125 -; Inst 1: ret -; }} +; block0: +; fmov v0.4s, #1.3125 +; ret diff --git a/cranelift/filetests/filetests/isa/aarch64/simd_load_zero.clif b/cranelift/filetests/filetests/isa/aarch64/simd_load_zero.clif index f39438c883..894ed03775 100644 --- a/cranelift/filetests/filetests/isa/aarch64/simd_load_zero.clif +++ b/cranelift/filetests/filetests/isa/aarch64/simd_load_zero.clif @@ -9,16 +9,11 @@ block0: return v1 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 4) -; Inst 0: movz x0, #1 -; Inst 1: movk x0, #1, LSL #48 -; Inst 2: fmov d0, x0 -; Inst 3: ret -; }} +; block0: +; movz x2, #1 +; movk x2, #1, LSL #48 +; fmov d0, x2 +; ret function %f2() -> i32x4 { block0: @@ -27,13 +22,8 @@ block0: return v1 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 3) -; Inst 0: movz x0, #42679 -; Inst 1: fmov s0, w0 -; Inst 2: ret -; }} +; block0: +; movz x2, #42679 +; fmov s0, w2 +; ret diff --git a/cranelift/filetests/filetests/isa/aarch64/stack-limit.clif b/cranelift/filetests/filetests/isa/aarch64/stack-limit.clif index 5ccabd8acf..993d63c3cc 100644 --- a/cranelift/filetests/filetests/isa/aarch64/stack-limit.clif +++ b/cranelift/filetests/filetests/isa/aarch64/stack-limit.clif @@ -7,26 +7,16 @@ block0: return } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 1) -; Inst 0: ret -; }} +; block0: +; ret function %stack_limit_leaf_zero(i64 stack_limit) { block0(v0: i64): return } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 1) -; Inst 0: ret -; }} +; block0: +; ret function %stack_limit_gv_leaf_zero(i64 vmctx) { gv0 = vmctx @@ -37,13 +27,8 @@ block0(v0: i64): return } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 1) -; Inst 0: ret -; }} +; block0: +; ret function %stack_limit_call_zero(i64 stack_limit) { fn0 = %foo() @@ -52,20 +37,15 @@ block0(v0: i64): return } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 8) -; Inst 0: stp fp, lr, [sp, #-16]! -; Inst 1: mov fp, sp -; Inst 2: subs xzr, sp, x0, UXTX -; Inst 3: b.hs 8 ; udf -; Inst 4: ldr x0, 8 ; b 12 ; data TestCase { length: 3, ascii: [102, 111, 111, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] } + 0 -; Inst 5: blr x0 -; Inst 6: ldp fp, lr, [sp], #16 -; Inst 7: ret -; }} +; stp fp, lr, [sp, #-16]! +; mov fp, sp +; subs xzr, sp, x0, UXTX +; b.hs 8 ; udf +; block0: +; ldr x2, 8 ; b 12 ; data TestCase { length: 3, ascii: [102, 111, 111, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] } + 0 +; blr x2 +; ldp fp, lr, [sp], #16 +; ret function %stack_limit_gv_call_zero(i64 vmctx) { gv0 = vmctx @@ -78,22 +58,17 @@ block0(v0: i64): return } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 10) -; Inst 0: stp fp, lr, [sp, #-16]! -; Inst 1: mov fp, sp -; Inst 2: ldur x16, [x0] -; Inst 3: ldur x16, [x16, #4] -; Inst 4: subs xzr, sp, x16, UXTX -; Inst 5: b.hs 8 ; udf -; Inst 6: ldr x0, 8 ; b 12 ; data TestCase { length: 3, ascii: [102, 111, 111, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] } + 0 -; Inst 7: blr x0 -; Inst 8: ldp fp, lr, [sp], #16 -; Inst 9: ret -; }} +; stp fp, lr, [sp, #-16]! +; mov fp, sp +; ldr x16, [x0] +; ldr x16, [x16, #4] +; subs xzr, sp, x16, UXTX +; b.hs 8 ; udf +; block0: +; ldr x2, 8 ; b 12 ; data TestCase { length: 3, ascii: [102, 111, 111, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] } + 0 +; blr x2 +; ldp fp, lr, [sp], #16 +; ret function %stack_limit(i64 stack_limit) { ss0 = explicit_slot 168 @@ -101,21 +76,16 @@ block0(v0: i64): return } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 9) -; Inst 0: stp fp, lr, [sp, #-16]! -; Inst 1: mov fp, sp -; Inst 2: add x16, x0, #176 -; Inst 3: subs xzr, sp, x16, UXTX -; Inst 4: b.hs 8 ; udf -; Inst 5: sub sp, sp, #176 -; Inst 6: add sp, sp, #176 -; Inst 7: ldp fp, lr, [sp], #16 -; Inst 8: ret -; }} +; stp fp, lr, [sp, #-16]! +; mov fp, sp +; add x16, x0, #176 +; subs xzr, sp, x16, UXTX +; b.hs 8 ; udf +; sub sp, sp, #176 +; block0: +; add sp, sp, #176 +; ldp fp, lr, [sp], #16 +; ret function %huge_stack_limit(i64 stack_limit) { ss0 = explicit_slot 400000 @@ -123,29 +93,24 @@ block0(v0: i64): return } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 17) -; Inst 0: stp fp, lr, [sp, #-16]! -; Inst 1: mov fp, sp -; Inst 2: subs xzr, sp, x0, UXTX -; Inst 3: b.hs 8 ; udf -; Inst 4: movz w17, #6784 -; Inst 5: movk w17, #6, LSL #16 -; Inst 6: add x16, x0, x17, UXTX -; Inst 7: subs xzr, sp, x16, UXTX -; Inst 8: b.hs 8 ; udf -; Inst 9: movz w16, #6784 -; Inst 10: movk w16, #6, LSL #16 -; Inst 11: sub sp, sp, x16, UXTX -; Inst 12: movz w16, #6784 -; Inst 13: movk w16, #6, LSL #16 -; Inst 14: add sp, sp, x16, UXTX -; Inst 15: ldp fp, lr, [sp], #16 -; Inst 16: ret -; }} +; stp fp, lr, [sp, #-16]! +; mov fp, sp +; subs xzr, sp, x0, UXTX +; b.hs 8 ; udf +; movz w17, #6784 +; movk w17, #6, LSL #16 +; add x16, x0, x17, UXTX +; subs xzr, sp, x16, UXTX +; b.hs 8 ; udf +; movz w16, #6784 +; movk w16, #6, LSL #16 +; sub sp, sp, x16, UXTX +; block0: +; movz w16, #6784 +; movk w16, #6, LSL #16 +; add sp, sp, x16, UXTX +; ldp fp, lr, [sp], #16 +; ret function %limit_preamble(i64 vmctx) { gv0 = vmctx @@ -157,23 +122,18 @@ block0(v0: i64): return } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 11) -; Inst 0: stp fp, lr, [sp, #-16]! -; Inst 1: mov fp, sp -; Inst 2: ldur x16, [x0] -; Inst 3: ldur x16, [x16, #4] -; Inst 4: add x16, x16, #32 -; Inst 5: subs xzr, sp, x16, UXTX -; Inst 6: b.hs 8 ; udf -; Inst 7: sub sp, sp, #32 -; Inst 8: add sp, sp, #32 -; Inst 9: ldp fp, lr, [sp], #16 -; Inst 10: ret -; }} +; stp fp, lr, [sp, #-16]! +; mov fp, sp +; ldr x16, [x0] +; ldr x16, [x16, #4] +; add x16, x16, #32 +; subs xzr, sp, x16, UXTX +; b.hs 8 ; udf +; sub sp, sp, #32 +; block0: +; add sp, sp, #32 +; ldp fp, lr, [sp], #16 +; ret function %limit_preamble_huge(i64 vmctx) { gv0 = vmctx @@ -185,31 +145,26 @@ block0(v0: i64): return } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 19) -; Inst 0: stp fp, lr, [sp, #-16]! -; Inst 1: mov fp, sp -; Inst 2: ldur x16, [x0] -; Inst 3: ldur x16, [x16, #4] -; Inst 4: subs xzr, sp, x16, UXTX -; Inst 5: b.hs 8 ; udf -; Inst 6: movz w17, #6784 -; Inst 7: movk w17, #6, LSL #16 -; Inst 8: add x16, x16, x17, UXTX -; Inst 9: subs xzr, sp, x16, UXTX -; Inst 10: b.hs 8 ; udf -; Inst 11: movz w16, #6784 -; Inst 12: movk w16, #6, LSL #16 -; Inst 13: sub sp, sp, x16, UXTX -; Inst 14: movz w16, #6784 -; Inst 15: movk w16, #6, LSL #16 -; Inst 16: add sp, sp, x16, UXTX -; Inst 17: ldp fp, lr, [sp], #16 -; Inst 18: ret -; }} +; stp fp, lr, [sp, #-16]! +; mov fp, sp +; ldr x16, [x0] +; ldr x16, [x16, #4] +; subs xzr, sp, x16, UXTX +; b.hs 8 ; udf +; movz w17, #6784 +; movk w17, #6, LSL #16 +; add x16, x16, x17, UXTX +; subs xzr, sp, x16, UXTX +; b.hs 8 ; udf +; movz w16, #6784 +; movk w16, #6, LSL #16 +; sub sp, sp, x16, UXTX +; block0: +; movz w16, #6784 +; movk w16, #6, LSL #16 +; add sp, sp, x16, UXTX +; ldp fp, lr, [sp], #16 +; ret function %limit_preamble_huge_offset(i64 vmctx) { gv0 = vmctx @@ -220,20 +175,15 @@ block0(v0: i64): return } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 10) -; Inst 0: stp fp, lr, [sp, #-16]! -; Inst 1: mov fp, sp -; Inst 2: movz w16, #6784 ; movk w16, #6, LSL #16 ; add x16, x0, x16, UXTX ; ldr x16, [x16] -; Inst 3: add x16, x16, #32 -; Inst 4: subs xzr, sp, x16, UXTX -; Inst 5: b.hs 8 ; udf -; Inst 6: sub sp, sp, #32 -; Inst 7: add sp, sp, #32 -; Inst 8: ldp fp, lr, [sp], #16 -; Inst 9: ret -; }} +; stp fp, lr, [sp, #-16]! +; mov fp, sp +; movz w16, #6784 ; movk w16, #6, LSL #16 ; add x16, x0, x16, UXTX ; ldr x16, [x16] +; add x16, x16, #32 +; subs xzr, sp, x16, UXTX +; b.hs 8 ; udf +; sub sp, sp, #32 +; block0: +; add sp, sp, #32 +; ldp fp, lr, [sp], #16 +; ret diff --git a/cranelift/filetests/filetests/isa/aarch64/stack.clif b/cranelift/filetests/filetests/isa/aarch64/stack.clif index a816eeb4d6..a5ebd29a9e 100644 --- a/cranelift/filetests/filetests/isa/aarch64/stack.clif +++ b/cranelift/filetests/filetests/isa/aarch64/stack.clif @@ -1,4 +1,4 @@ -test compile +test compile precise-output set unwind_info=false target aarch64 @@ -10,14 +10,14 @@ block0: return v0 } -; check: stp fp, lr, [sp, #-16]! -; nextln: mov fp, sp -; nextln: sub sp, sp, #16 -; nextln: mov x0, sp -; nextln: add sp, sp, #16 -; nextln: ldp fp, lr, [sp], #16 -; nextln: ret - +; stp fp, lr, [sp, #-16]! +; mov fp, sp +; sub sp, sp, #16 +; block0: +; mov x0, sp +; add sp, sp, #16 +; ldp fp, lr, [sp], #16 +; ret function %stack_addr_big() -> i64 { ss0 = explicit_slot 100000 @@ -28,20 +28,18 @@ block0: return v0 } -; check: stp fp, lr, [sp, #-16]! -; nextln: mov fp, sp -; nextln: movz w16, #34480 -; nextln: movk w16, #1, LSL #16 -; nextln: sub sp, sp, x16, UXTX -; nextln: mov x0, sp -; nextln: movz w16, #34480 -; nextln: movk w16, #1, LSL #16 -; nextln: add sp, sp, x16, UXTX -; nextln: ldp fp, lr, [sp], #16 -; nextln: ret - - -; FIXME: don't use stack_addr legalization for stack_load and stack_store +; stp fp, lr, [sp, #-16]! +; mov fp, sp +; movz w16, #34480 +; movk w16, #1, LSL #16 +; sub sp, sp, x16, UXTX +; block0: +; mov x0, sp +; movz w16, #34480 +; movk w16, #1, LSL #16 +; add sp, sp, x16, UXTX +; ldp fp, lr, [sp], #16 +; ret function %stack_load_small() -> i64 { ss0 = explicit_slot 8 @@ -51,15 +49,15 @@ block0: return v0 } -; check: stp fp, lr, [sp, #-16]! -; nextln: mov fp, sp -; nextln: sub sp, sp, #16 -; nextln: mov x0, sp -; nextln: ldr x0, [x0] -; nextln: add sp, sp, #16 -; nextln: ldp fp, lr, [sp], #16 -; nextln: ret - +; stp fp, lr, [sp, #-16]! +; mov fp, sp +; sub sp, sp, #16 +; block0: +; mov x0, sp +; ldr x0, [x0] +; add sp, sp, #16 +; ldp fp, lr, [sp], #16 +; ret function %stack_load_big() -> i64 { ss0 = explicit_slot 100000 @@ -70,19 +68,19 @@ block0: return v0 } -; check: stp fp, lr, [sp, #-16]! -; nextln: mov fp, sp -; nextln: movz w16, #34480 -; nextln: movk w16, #1, LSL #16 -; nextln: sub sp, sp, x16, UXTX -; nextln: mov x0, sp -; nextln: ldr x0, [x0] -; nextln: movz w16, #34480 -; nextln: movk w16, #1, LSL #16 -; nextln: add sp, sp, x16, UXTX -; nextln: ldp fp, lr, [sp], #16 -; nextln: ret - +; stp fp, lr, [sp, #-16]! +; mov fp, sp +; movz w16, #34480 +; movk w16, #1, LSL #16 +; sub sp, sp, x16, UXTX +; block0: +; mov x0, sp +; ldr x0, [x0] +; movz w16, #34480 +; movk w16, #1, LSL #16 +; add sp, sp, x16, UXTX +; ldp fp, lr, [sp], #16 +; ret function %stack_store_small(i64) { ss0 = explicit_slot 8 @@ -92,15 +90,15 @@ block0(v0: i64): return } -; check: stp fp, lr, [sp, #-16]! -; nextln: mov fp, sp -; nextln: sub sp, sp, #16 -; nextln: mov x1, sp -; nextln: str x0, [x1] -; nextln: add sp, sp, #16 -; nextln: ldp fp, lr, [sp], #16 -; nextln: ret - +; stp fp, lr, [sp, #-16]! +; mov fp, sp +; sub sp, sp, #16 +; block0: +; mov x2, sp +; str x0, [x2] +; add sp, sp, #16 +; ldp fp, lr, [sp], #16 +; ret function %stack_store_big(i64) { ss0 = explicit_slot 100000 @@ -111,21 +109,20 @@ block0(v0: i64): return } -; check: stp fp, lr, [sp, #-16]! -; nextln: mov fp, sp -; nextln: movz w16, #34480 -; nextln: movk w16, #1, LSL #16 -; nextln: sub sp, sp, x16, UXTX -; nextln: mov x1, sp -; nextln: str x0, [x1] -; nextln: movz w16, #34480 -; nextln: movk w16, #1, LSL #16 -; nextln: add sp, sp, x16, UXTX -; nextln: ldp fp, lr, [sp], #16 -; nextln: ret +; stp fp, lr, [sp, #-16]! +; mov fp, sp +; movz w16, #34480 +; movk w16, #1, LSL #16 +; sub sp, sp, x16, UXTX +; block0: +; mov x2, sp +; str x0, [x2] +; movz w16, #34480 +; movk w16, #1, LSL #16 +; add sp, sp, x16, UXTX +; ldp fp, lr, [sp], #16 +; ret -; Force a b1 to be spilled into a slot at an SP offset between 0x100 and -; 0x1fff, to exercise the scaled addressing mode. function %b1_spill_slot(b1) -> b1, i64 { ss0 = explicit_slot 1000 @@ -277,7 +274,161 @@ block0(v0: b1): return v0, v137 } - +; stp fp, lr, [sp, #-16]! +; mov fp, sp +; stp x27, x28, [sp, #-16]! +; stp x25, x26, [sp, #-16]! +; stp x23, x24, [sp, #-16]! +; stp x21, x22, [sp, #-16]! +; stp x19, x20, [sp, #-16]! +; sub sp, sp, #1152 +; block0: +; str x0, [sp, #1000] +; movz x8, #2 +; add x11, x8, #1 +; str x11, [sp, #1136] +; movz x8, #4 +; add x12, x8, #3 +; str x12, [sp, #1128] +; movz x8, #6 +; add x13, x8, #5 +; str x13, [sp, #1120] +; movz x8, #8 +; add x14, x8, #7 +; str x14, [sp, #1112] +; movz x8, #10 +; add x15, x8, #9 +; str x15, [sp, #1104] +; movz x8, #12 +; add x1, x8, #11 +; str x1, [sp, #1096] +; movz x8, #14 +; add x2, x8, #13 +; str x2, [sp, #1088] +; movz x8, #16 +; add x3, x8, #15 +; str x3, [sp, #1080] +; movz x8, #18 +; add x4, x8, #17 +; str x4, [sp, #1072] +; movz x8, #20 +; add x5, x8, #19 +; str x5, [sp, #1064] +; movz x8, #22 +; add x6, x8, #21 +; str x6, [sp, #1056] +; movz x8, #24 +; add x7, x8, #23 +; str x7, [sp, #1048] +; movz x8, #26 +; add x8, x8, #25 +; str x8, [sp, #1040] +; movz x8, #28 +; add x9, x8, #27 +; str x9, [sp, #1032] +; movz x8, #30 +; add x26, x8, #29 +; str x26, [sp, #1024] +; movz x8, #32 +; add x27, x8, #31 +; str x27, [sp, #1016] +; movz x8, #34 +; add x28, x8, #33 +; movz x8, #36 +; add x21, x8, #35 +; str x21, [sp, #1008] +; movz x8, #38 +; add x21, x8, #37 +; movz x8, #30 +; add x19, x8, #39 +; movz x8, #32 +; add x20, x8, #31 +; movz x8, #34 +; add x22, x8, #33 +; movz x8, #36 +; add x23, x8, #35 +; movz x8, #38 +; add x24, x8, #37 +; movz x8, #30 +; add x25, x8, #39 +; movz x8, #32 +; add x0, x8, #31 +; movz x8, #34 +; add x10, x8, #33 +; movz x8, #36 +; add x11, x8, #35 +; movz x8, #38 +; add x12, x8, #37 +; movz x8, #30 +; add x13, x8, #39 +; movz x8, #32 +; add x14, x8, #31 +; movz x8, #34 +; add x15, x8, #33 +; movz x8, #36 +; add x1, x8, #35 +; movz x8, #38 +; add x2, x8, #37 +; ldr x3, [sp, #1136] +; add x3, x3, #39 +; ldr x5, [sp, #1120] +; ldr x4, [sp, #1128] +; add x4, x4, x5 +; ldr x5, [sp, #1104] +; ldr x8, [sp, #1112] +; add x5, x8, x5 +; ldr x6, [sp, #1088] +; ldr x7, [sp, #1096] +; add x6, x7, x6 +; ldr x7, [sp, #1072] +; ldr x8, [sp, #1080] +; add x7, x8, x7 +; ldr x9, [sp, #1056] +; ldr x8, [sp, #1064] +; add x8, x8, x9 +; ldr x9, [sp, #1040] +; ldr x26, [sp, #1048] +; add x9, x26, x9 +; ldr x26, [sp, #1024] +; ldr x27, [sp, #1032] +; add x26, x27, x26 +; ldr x27, [sp, #1016] +; add x27, x27, x28 +; ldr x28, [sp, #1008] +; add x28, x28, x21 +; add x21, x19, x20 +; add x19, x22, x23 +; add x25, x24, x25 +; add x10, x0, x10 +; add x11, x11, x12 +; add x12, x13, x14 +; add x13, x15, x1 +; add x14, x2, x3 +; add x15, x4, x5 +; add x0, x6, x7 +; add x9, x8, x9 +; add x1, x26, x27 +; add x2, x28, x21 +; add x3, x19, x25 +; add x10, x10, x11 +; add x11, x12, x13 +; add x12, x14, x15 +; add x9, x0, x9 +; add x13, x1, x2 +; add x10, x3, x10 +; add x11, x11, x12 +; add x9, x9, x13 +; add x10, x10, x11 +; add x1, x9, x10 +; ldr x0, [sp, #1000] +; add sp, sp, #1152 +; ldp x19, x20, [sp], #16 +; ldp x21, x22, [sp], #16 +; ldp x23, x24, [sp], #16 +; ldp x25, x26, [sp], #16 +; ldp x27, x28, [sp], #16 +; ldp fp, lr, [sp], #16 +; ret function %i128_stack_store(i128) { ss0 = explicit_slot 16 @@ -286,15 +437,16 @@ block0(v0: i128): stack_store.i128 v0, ss0 return } -; TODO: Codegen improvement opportunities: This should be just a stp x0, x1, [sp, #-16] -; check: stp fp, lr, [sp, #-16]! -; nextln: mov fp, sp -; nextln: sub sp, sp, #16 -; nextln: mov x2, sp -; nextln: stp x0, x1, [x2] -; nextln: add sp, sp, #16 -; nextln: ldp fp, lr, [sp], #16 +; stp fp, lr, [sp, #-16]! +; mov fp, sp +; sub sp, sp, #16 +; block0: +; mov x4, sp +; stp x0, x1, [x4] +; add sp, sp, #16 +; ldp fp, lr, [sp], #16 +; ret function %i128_stack_store_inst_offset(i128) { ss0 = explicit_slot 16 @@ -304,15 +456,16 @@ block0(v0: i128): stack_store.i128 v0, ss1+16 return } -; check: stp fp, lr, [sp, #-16]! -; nextln: mov fp, sp -; nextln: sub sp, sp, #32 -; nextln: add x2, sp, #32 -; nextln: stp x0, x1, [x2] -; nextln: add sp, sp, #32 -; nextln: ldp fp, lr, [sp], #16 -; nextln: ret +; stp fp, lr, [sp, #-16]! +; mov fp, sp +; sub sp, sp, #32 +; block0: +; add x4, sp, #32 +; stp x0, x1, [x4] +; add sp, sp, #32 +; ldp fp, lr, [sp], #16 +; ret function %i128_stack_store_big(i128) { ss0 = explicit_slot 100000 @@ -322,20 +475,20 @@ block0(v0: i128): stack_store.i128 v0, ss0 return } -; check: stp fp, lr, [sp, #-16]! -; nextln: mov fp, sp -; nextln: movz w16, #34480 -; nextln: movk w16, #1, LSL #16 -; nextln: sub sp, sp, x16, UXTX -; nextln: mov x2, sp -; nextln: stp x0, x1, [x2] -; nextln: movz w16, #34480 -; nextln: movk w16, #1, LSL #16 -; nextln: add sp, sp, x16, UXTX -; nextln: ldp fp, lr, [sp], #16 -; nextln: ret - +; stp fp, lr, [sp, #-16]! +; mov fp, sp +; movz w16, #34480 +; movk w16, #1, LSL #16 +; sub sp, sp, x16, UXTX +; block0: +; mov x4, sp +; stp x0, x1, [x4] +; movz w16, #34480 +; movk w16, #1, LSL #16 +; add sp, sp, x16, UXTX +; ldp fp, lr, [sp], #16 +; ret function %i128_stack_load() -> i128 { ss0 = explicit_slot 16 @@ -344,20 +497,16 @@ block0: v0 = stack_load.i128 ss0 return v0 } -; TODO: Codegen improvement opportunities: This should be just a ldp x0, x1, [sp, #-16] -; check: stp fp, lr, [sp, #-16]! -; nextln: mov fp, sp -; nextln: sub sp, sp, #16 -; nextln: mov x0, sp -; nextln: ldp x1, x0, [x0] -; nextln: mov x2, x0 -; nextln: mov x0, x1 -; nextln: mov x1, x2 -; nextln: add sp, sp, #16 -; nextln: ldp fp, lr, [sp], #16 -; nextln: ret - +; stp fp, lr, [sp, #-16]! +; mov fp, sp +; sub sp, sp, #16 +; block0: +; mov x0, sp +; ldp x0, x1, [x0] +; add sp, sp, #16 +; ldp fp, lr, [sp], #16 +; ret function %i128_stack_load_inst_offset() -> i128 { ss0 = explicit_slot 16 @@ -367,19 +516,16 @@ block0: v0 = stack_load.i128 ss1+16 return v0 } -; check: stp fp, lr, [sp, #-16]! -; nextln: mov fp, sp -; nextln: sub sp, sp, #32 -; nextln: add x0, sp, #32 -; nextln: ldp x1, x0, [x0] -; nextln: mov x2, x0 -; nextln: mov x0, x1 -; nextln: mov x1, x2 -; nextln: add sp, sp, #32 -; nextln: ldp fp, lr, [sp], #16 -; nextln: ret - +; stp fp, lr, [sp, #-16]! +; mov fp, sp +; sub sp, sp, #32 +; block0: +; add x0, sp, #32 +; ldp x0, x1, [x0] +; add sp, sp, #32 +; ldp fp, lr, [sp], #16 +; ret function %i128_stack_load_big() -> i128 { ss0 = explicit_slot 100000 @@ -389,18 +535,18 @@ block0: v0 = stack_load.i128 ss0 return v0 } -; check: stp fp, lr, [sp, #-16]! -; nextln: mov fp, sp -; nextln: movz w16, #34480 -; nextln: movk w16, #1, LSL #16 -; nextln: sub sp, sp, x16, UXTX -; nextln: mov x0, sp -; nextln: ldp x1, x0, [x0] -; nextln: mov x2, x0 -; nextln: mov x0, x1 -; nextln: mov x1, x2 -; nextln: movz w16, #34480 -; nextln: movk w16, #1, LSL #16 -; nextln: add sp, sp, x16, UXTX -; nextln: ldp fp, lr, [sp], #16 -; nextln: ret + +; stp fp, lr, [sp, #-16]! +; mov fp, sp +; movz w16, #34480 +; movk w16, #1, LSL #16 +; sub sp, sp, x16, UXTX +; block0: +; mov x0, sp +; ldp x0, x1, [x0] +; movz w16, #34480 +; movk w16, #1, LSL #16 +; add sp, sp, x16, UXTX +; ldp fp, lr, [sp], #16 +; ret + diff --git a/cranelift/filetests/filetests/isa/aarch64/symbol-value.clif b/cranelift/filetests/filetests/isa/aarch64/symbol-value.clif index c1c695d9ae..b9eecef1bd 100644 --- a/cranelift/filetests/filetests/isa/aarch64/symbol-value.clif +++ b/cranelift/filetests/filetests/isa/aarch64/symbol-value.clif @@ -10,12 +10,7 @@ block0: return v0 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: ldr x0, 8 ; b 12 ; data TestCase { length: 9, ascii: [109, 121, 95, 103, 108, 111, 98, 97, 108, 0, 0, 0, 0, 0, 0, 0] } + 0 -; Inst 1: ret -; }} +; block0: +; ldr x0, 8 ; b 12 ; data TestCase { length: 9, ascii: [109, 121, 95, 103, 108, 111, 98, 97, 108, 0, 0, 0, 0, 0, 0, 0] } + 0 +; ret diff --git a/cranelift/filetests/filetests/isa/aarch64/tls-elf-gd.clif b/cranelift/filetests/filetests/isa/aarch64/tls-elf-gd.clif index fa81e520db..7cc42b191b 100644 --- a/cranelift/filetests/filetests/isa/aarch64/tls-elf-gd.clif +++ b/cranelift/filetests/filetests/isa/aarch64/tls-elf-gd.clif @@ -10,28 +10,23 @@ block0(v0: i32): return v0, v1 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 18) -; Inst 0: stp fp, lr, [sp, #-16]! -; Inst 1: mov fp, sp -; Inst 2: str x19, [sp, #-16]! -; Inst 3: stp d14, d15, [sp, #-16]! -; Inst 4: stp d12, d13, [sp, #-16]! -; Inst 5: stp d10, d11, [sp, #-16]! -; Inst 6: stp d8, d9, [sp, #-16]! -; Inst 7: mov x19, x0 -; Inst 8: elf_tls_get_addr u1:0 -; Inst 9: mov x1, x0 -; Inst 10: mov x0, x19 -; Inst 11: ldp d8, d9, [sp], #16 -; Inst 12: ldp d10, d11, [sp], #16 -; Inst 13: ldp d12, d13, [sp], #16 -; Inst 14: ldp d14, d15, [sp], #16 -; Inst 15: ldr x19, [sp], #16 -; Inst 16: ldp fp, lr, [sp], #16 -; Inst 17: ret -; }} +; stp fp, lr, [sp, #-16]! +; mov fp, sp +; str x25, [sp, #-16]! +; stp d14, d15, [sp, #-16]! +; stp d12, d13, [sp, #-16]! +; stp d10, d11, [sp, #-16]! +; stp d8, d9, [sp, #-16]! +; block0: +; mov x25, x0 +; elf_tls_get_addr u1:0 +; mov x1, x0 +; mov x0, x25 +; ldp d8, d9, [sp], #16 +; ldp d10, d11, [sp], #16 +; ldp d12, d13, [sp], #16 +; ldp d14, d15, [sp], #16 +; ldr x25, [sp], #16 +; ldp fp, lr, [sp], #16 +; ret diff --git a/cranelift/filetests/filetests/isa/aarch64/traps.clif b/cranelift/filetests/filetests/isa/aarch64/traps.clif index 34c59f4678..e01b3e77cb 100644 --- a/cranelift/filetests/filetests/isa/aarch64/traps.clif +++ b/cranelift/filetests/filetests/isa/aarch64/traps.clif @@ -7,13 +7,8 @@ block0: trap user0 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 1) -; Inst 0: udf -; }} +; block0: +; udf function %g(i64) { block0(v0: i64): @@ -23,15 +18,10 @@ block0(v0: i64): return } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 3) -; Inst 0: subs xzr, x0, #42 -; Inst 1: b.ne 8 ; udf -; Inst 2: ret -; }} +; block0: +; subs xzr, x0, #42 +; b.ne 8 ; udf +; ret function %h() { block0: @@ -39,12 +29,7 @@ block0: return } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: brk #0 -; Inst 1: ret -; }} +; block0: +; brk #0 +; ret diff --git a/cranelift/filetests/filetests/isa/aarch64/uextend-sextend.clif b/cranelift/filetests/filetests/isa/aarch64/uextend-sextend.clif index 46b08b60b5..a13f20c555 100644 --- a/cranelift/filetests/filetests/isa/aarch64/uextend-sextend.clif +++ b/cranelift/filetests/filetests/isa/aarch64/uextend-sextend.clif @@ -8,14 +8,9 @@ block0(v0: i8): return v1 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: uxtb w0, w0 -; Inst 1: ret -; }} +; block0: +; uxtb w0, w0 +; ret function %f_u_8_32(i8) -> i32 { block0(v0: i8): @@ -23,14 +18,9 @@ block0(v0: i8): return v1 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: uxtb w0, w0 -; Inst 1: ret -; }} +; block0: +; uxtb w0, w0 +; ret function %f_u_8_16(i8) -> i16 { block0(v0: i8): @@ -38,14 +28,9 @@ block0(v0: i8): return v1 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: uxtb w0, w0 -; Inst 1: ret -; }} +; block0: +; uxtb w0, w0 +; ret function %f_s_8_64(i8) -> i64 { block0(v0: i8): @@ -53,14 +38,9 @@ block0(v0: i8): return v1 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: sxtb x0, w0 -; Inst 1: ret -; }} +; block0: +; sxtb x0, w0 +; ret function %f_s_8_32(i8) -> i32 { block0(v0: i8): @@ -68,14 +48,9 @@ block0(v0: i8): return v1 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: sxtb w0, w0 -; Inst 1: ret -; }} +; block0: +; sxtb w0, w0 +; ret function %f_s_8_16(i8) -> i16 { block0(v0: i8): @@ -83,14 +58,9 @@ block0(v0: i8): return v1 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: sxtb w0, w0 -; Inst 1: ret -; }} +; block0: +; sxtb w0, w0 +; ret function %f_u_16_64(i16) -> i64 { block0(v0: i16): @@ -98,14 +68,9 @@ block0(v0: i16): return v1 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: uxth w0, w0 -; Inst 1: ret -; }} +; block0: +; uxth w0, w0 +; ret function %f_u_16_32(i16) -> i32 { block0(v0: i16): @@ -113,14 +78,9 @@ block0(v0: i16): return v1 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: uxth w0, w0 -; Inst 1: ret -; }} +; block0: +; uxth w0, w0 +; ret function %f_s_16_64(i16) -> i64 { block0(v0: i16): @@ -128,14 +88,9 @@ block0(v0: i16): return v1 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: sxth x0, w0 -; Inst 1: ret -; }} +; block0: +; sxth x0, w0 +; ret function %f_s_16_32(i16) -> i32 { block0(v0: i16): @@ -143,14 +98,9 @@ block0(v0: i16): return v1 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: sxth w0, w0 -; Inst 1: ret -; }} +; block0: +; sxth w0, w0 +; ret function %f_u_32_64(i32) -> i64 { block0(v0: i32): @@ -158,14 +108,9 @@ block0(v0: i32): return v1 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: mov w0, w0 -; Inst 1: ret -; }} +; block0: +; mov w0, w0 +; ret function %f_s_32_64(i32) -> i64 { block0(v0: i32): @@ -173,12 +118,7 @@ block0(v0: i32): return v1 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 2) -; Inst 0: sxtw x0, w0 -; Inst 1: ret -; }} +; block0: +; sxtw x0, w0 +; ret diff --git a/cranelift/filetests/filetests/isa/s390x/arithmetic.clif b/cranelift/filetests/filetests/isa/s390x/arithmetic.clif index 98883f6ae7..945e251371 100644 --- a/cranelift/filetests/filetests/isa/s390x/arithmetic.clif +++ b/cranelift/filetests/filetests/isa/s390x/arithmetic.clif @@ -1,4 +1,4 @@ -test compile +test compile precise-output target s390x ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; @@ -11,8 +11,9 @@ block0(v0: i64, v1: i64): return v2 } -; check: agr %r2, %r3 -; nextln: br %r14 +; block0: +; agr %r2, %r3 +; br %r14 function %iadd_i64_ext32(i64, i32) -> i64 { block0(v0: i64, v1: i32): @@ -21,8 +22,9 @@ block0(v0: i64, v1: i32): return v3 } -; check: agfr %r2, %r3 -; nextln: br %r14 +; block0: +; agfr %r2, %r3 +; br %r14 function %iadd_i64_imm16(i64) -> i64 { block0(v0: i64): @@ -31,8 +33,9 @@ block0(v0: i64): return v2 } -; check: aghi %r2, 1 -; nextln: br %r14 +; block0: +; aghi %r2, 1 +; br %r14 function %iadd_i64_imm32(i64) -> i64 { block0(v0: i64): @@ -41,8 +44,9 @@ block0(v0: i64): return v2 } -; check: agfi %r2, 32768 -; nextln: br %r14 +; block0: +; agfi %r2, 32768 +; br %r14 function %iadd_i64_mem(i64, i64) -> i64 { block0(v0: i64, v1: i64): @@ -51,8 +55,9 @@ block0(v0: i64, v1: i64): return v3 } -; check: ag %r2, 0(%r3) -; nextln: br %r14 +; block0: +; ag %r2, 0(%r3) +; br %r14 function %iadd_i64_mem_ext16(i64, i64) -> i64 { block0(v0: i64, v1: i64): @@ -61,8 +66,9 @@ block0(v0: i64, v1: i64): return v3 } -; check: agh %r2, 0(%r3) -; nextln: br %r14 +; block0: +; agh %r2, 0(%r3) +; br %r14 function %iadd_i64_mem_ext32(i64, i64) -> i64 { block0(v0: i64, v1: i64): @@ -71,8 +77,9 @@ block0(v0: i64, v1: i64): return v3 } -; check: agf %r2, 0(%r3) -; nextln: br %r14 +; block0: +; agf %r2, 0(%r3) +; br %r14 function %iadd_i32(i32, i32) -> i32 { block0(v0: i32, v1: i32): @@ -80,8 +87,9 @@ block0(v0: i32, v1: i32): return v2 } -; check: ar %r2, %r3 -; nextln: br %r14 +; block0: +; ar %r2, %r3 +; br %r14 function %iadd_i32_imm16(i32) -> i32 { block0(v0: i32): @@ -90,8 +98,9 @@ block0(v0: i32): return v2 } -; check: ahi %r2, 1 -; nextln: br %r14 +; block0: +; ahi %r2, 1 +; br %r14 function %iadd_i32_imm(i32) -> i32 { block0(v0: i32): @@ -100,8 +109,9 @@ block0(v0: i32): return v2 } -; check: afi %r2, 32768 -; nextln: br %r14 +; block0: +; afi %r2, 32768 +; br %r14 function %iadd_i32_mem(i32, i64) -> i32 { block0(v0: i32, v1: i64): @@ -110,8 +120,9 @@ block0(v0: i32, v1: i64): return v3 } -; check: a %r2, 0(%r3) -; nextln: br %r14 +; block0: +; a %r2, 0(%r3) +; br %r14 function %iadd_i32_memoff(i32, i64) -> i32 { block0(v0: i32, v1: i64): @@ -120,8 +131,9 @@ block0(v0: i32, v1: i64): return v3 } -; check: ay %r2, 4096(%r3) -; nextln: br %r14 +; block0: +; ay %r2, 4096(%r3) +; br %r14 function %iadd_i32_mem_ext16(i32, i64) -> i32 { block0(v0: i32, v1: i64): @@ -130,8 +142,9 @@ block0(v0: i32, v1: i64): return v3 } -; check: ah %r2, 0(%r3) -; nextln: br %r14 +; block0: +; ah %r2, 0(%r3) +; br %r14 function %iadd_i32_memoff_ext16(i32, i64) -> i32 { block0(v0: i32, v1: i64): @@ -140,8 +153,9 @@ block0(v0: i32, v1: i64): return v3 } -; check: ahy %r2, 4096(%r3) -; nextln: br %r14 +; block0: +; ahy %r2, 4096(%r3) +; br %r14 function %iadd_i16(i16, i16) -> i16 { block0(v0: i16, v1: i16): @@ -149,8 +163,9 @@ block0(v0: i16, v1: i16): return v2 } -; check: ar %r2, %r3 -; nextln: br %r14 +; block0: +; ar %r2, %r3 +; br %r14 function %iadd_i16_imm(i16) -> i16 { block0(v0: i16): @@ -159,8 +174,9 @@ block0(v0: i16): return v2 } -; check: ahi %r2, 1 -; nextln: br %r14 +; block0: +; ahi %r2, 1 +; br %r14 function %iadd_i16_mem(i16, i64) -> i16 { block0(v0: i16, v1: i64): @@ -169,8 +185,9 @@ block0(v0: i16, v1: i64): return v3 } -; check: ah %r2, 0(%r3) -; nextln: br %r14 +; block0: +; ah %r2, 0(%r3) +; br %r14 function %iadd_i8(i8, i8) -> i8 { block0(v0: i8, v1: i8): @@ -178,8 +195,9 @@ block0(v0: i8, v1: i8): return v2 } -; check: ar %r2, %r3 -; nextln: br %r14 +; block0: +; ar %r2, %r3 +; br %r14 function %iadd_i8_imm(i8) -> i8 { block0(v0: i8): @@ -188,8 +206,9 @@ block0(v0: i8): return v2 } -; check: ahi %r2, 1 -; nextln: br %r14 +; block0: +; ahi %r2, 1 +; br %r14 function %iadd_i8_mem(i8, i64) -> i8 { block0(v0: i8, v1: i64): @@ -198,13 +217,10 @@ block0(v0: i8, v1: i64): return v3 } -; check: llc %r3, 0(%r3) -; nextln: ar %r2, %r3 -; nextln: br %r14 - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; IADD_IFCOUT -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; block0: +; llc %r4, 0(%r3) +; ar %r2, %r4 +; br %r14 function %iadd_i64(i64, i64) -> i64 { block0(v0: i64, v1: i64): @@ -212,8 +228,9 @@ block0(v0: i64, v1: i64): return v2 } -; check: algr %r2, %r3 -; nextln: br %r14 +; block0: +; algr %r2, %r3 +; br %r14 function %iadd_i64_ext32(i64, i32) -> i64 { block0(v0: i64, v1: i32): @@ -222,8 +239,9 @@ block0(v0: i64, v1: i32): return v3 } -; check: algfr %r2, %r3 -; nextln: br %r14 +; block0: +; algfr %r2, %r3 +; br %r14 function %iadd_i64_imm32(i64) -> i64 { block0(v0: i64): @@ -232,8 +250,9 @@ block0(v0: i64): return v2 } -; check: algfi %r2, 32768 -; nextln: br %r14 +; block0: +; algfi %r2, 32768 +; br %r14 function %iadd_i64_mem(i64, i64) -> i64 { block0(v0: i64, v1: i64): @@ -242,8 +261,9 @@ block0(v0: i64, v1: i64): return v3 } -; check: alg %r2, 0(%r3) -; nextln: br %r14 +; block0: +; alg %r2, 0(%r3) +; br %r14 function %iadd_i64_mem_ext32(i64, i64) -> i64 { block0(v0: i64, v1: i64): @@ -252,8 +272,9 @@ block0(v0: i64, v1: i64): return v3 } -; check: algf %r2, 0(%r3) -; nextln: br %r14 +; block0: +; algf %r2, 0(%r3) +; br %r14 function %iadd_i32(i32, i32) -> i32 { block0(v0: i32, v1: i32): @@ -261,8 +282,9 @@ block0(v0: i32, v1: i32): return v2 } -; check: alr %r2, %r3 -; nextln: br %r14 +; block0: +; alr %r2, %r3 +; br %r14 function %iadd_i32_imm(i32) -> i32 { block0(v0: i32): @@ -271,8 +293,9 @@ block0(v0: i32): return v2 } -; check: alfi %r2, 32768 -; nextln: br %r14 +; block0: +; alfi %r2, 32768 +; br %r14 function %iadd_i32_mem(i32, i64) -> i32 { block0(v0: i32, v1: i64): @@ -281,8 +304,9 @@ block0(v0: i32, v1: i64): return v3 } -; check: al %r2, 0(%r3) -; nextln: br %r14 +; block0: +; al %r2, 0(%r3) +; br %r14 function %iadd_i32_memoff(i32, i64) -> i32 { block0(v0: i32, v1: i64): @@ -291,12 +315,9 @@ block0(v0: i32, v1: i64): return v3 } -; check: aly %r2, 4096(%r3) -; nextln: br %r14 - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; ISUB -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; block0: +; aly %r2, 4096(%r3) +; br %r14 function %isub_i64(i64, i64) -> i64 { block0(v0: i64, v1: i64): @@ -304,8 +325,9 @@ block0(v0: i64, v1: i64): return v2 } -; check: sgr %r2, %r3 -; nextln: br %r14 +; block0: +; sgr %r2, %r3 +; br %r14 function %isub_i64_ext32(i64, i32) -> i64 { block0(v0: i64, v1: i32): @@ -314,8 +336,9 @@ block0(v0: i64, v1: i32): return v3 } -; check: sgfr %r2, %r3 -; nextln: br %r14 +; block0: +; sgfr %r2, %r3 +; br %r14 function %isub_i64_imm16(i64) -> i64 { block0(v0: i64): @@ -324,8 +347,9 @@ block0(v0: i64): return v2 } -; check: aghi %r2, -1 -; nextln: br %r14 +; block0: +; aghi %r2, -1 +; br %r14 function %isub_i64_imm32(i64) -> i64 { block0(v0: i64): @@ -334,8 +358,9 @@ block0(v0: i64): return v2 } -; check: agfi %r2, -32769 -; nextln: br %r14 +; block0: +; agfi %r2, -32769 +; br %r14 function %isub_i64_mem(i64, i64) -> i64 { block0(v0: i64, v1: i64): @@ -344,8 +369,9 @@ block0(v0: i64, v1: i64): return v3 } -; check: sg %r2, 0(%r3) -; nextln: br %r14 +; block0: +; sg %r2, 0(%r3) +; br %r14 function %isub_i64_mem_ext16(i64, i64) -> i64 { block0(v0: i64, v1: i64): @@ -354,8 +380,9 @@ block0(v0: i64, v1: i64): return v3 } -; check: sgh %r2, 0(%r3) -; nextln: br %r14 +; block0: +; sgh %r2, 0(%r3) +; br %r14 function %isub_i64_mem_ext32(i64, i64) -> i64 { block0(v0: i64, v1: i64): @@ -364,8 +391,9 @@ block0(v0: i64, v1: i64): return v3 } -; check: sgf %r2, 0(%r3) -; nextln: br %r14 +; block0: +; sgf %r2, 0(%r3) +; br %r14 function %isub_i32(i32, i32) -> i32 { block0(v0: i32, v1: i32): @@ -373,8 +401,9 @@ block0(v0: i32, v1: i32): return v2 } -; check: sr %r2, %r3 -; nextln: br %r14 +; block0: +; sr %r2, %r3 +; br %r14 function %isub_i32_imm16(i32) -> i32 { block0(v0: i32): @@ -383,8 +412,9 @@ block0(v0: i32): return v2 } -; check: ahi %r2, -1 -; nextln: br %r14 +; block0: +; ahi %r2, -1 +; br %r14 function %isub_i32_imm(i32) -> i32 { block0(v0: i32): @@ -393,8 +423,9 @@ block0(v0: i32): return v2 } -; check: afi %r2, -32769 -; nextln: br %r14 +; block0: +; afi %r2, -32769 +; br %r14 function %isub_i32_mem(i32, i64) -> i32 { block0(v0: i32, v1: i64): @@ -403,8 +434,9 @@ block0(v0: i32, v1: i64): return v3 } -; check: s %r2, 0(%r3) -; nextln: br %r14 +; block0: +; s %r2, 0(%r3) +; br %r14 function %isub_i32_memoff(i32, i64) -> i32 { block0(v0: i32, v1: i64): @@ -413,8 +445,9 @@ block0(v0: i32, v1: i64): return v3 } -; check: sy %r2, 4096(%r3) -; nextln: br %r14 +; block0: +; sy %r2, 4096(%r3) +; br %r14 function %isub_i32_mem_ext16(i32, i64) -> i32 { block0(v0: i32, v1: i64): @@ -423,8 +456,9 @@ block0(v0: i32, v1: i64): return v3 } -; check: sh %r2, 0(%r3) -; nextln: br %r14 +; block0: +; sh %r2, 0(%r3) +; br %r14 function %isub_i32_memoff_ext16(i32, i64) -> i32 { block0(v0: i32, v1: i64): @@ -433,8 +467,9 @@ block0(v0: i32, v1: i64): return v3 } -; check: shy %r2, 4096(%r3) -; nextln: br %r14 +; block0: +; shy %r2, 4096(%r3) +; br %r14 function %isub_i16(i16, i16) -> i16 { block0(v0: i16, v1: i16): @@ -442,8 +477,9 @@ block0(v0: i16, v1: i16): return v2 } -; check: sr %r2, %r3 -; nextln: br %r14 +; block0: +; sr %r2, %r3 +; br %r14 function %isub_i16_imm(i16) -> i16 { block0(v0: i16): @@ -452,8 +488,9 @@ block0(v0: i16): return v2 } -; check: ahi %r2, -1 -; nextln: br %r14 +; block0: +; ahi %r2, -1 +; br %r14 function %isub_i16_mem(i16, i64) -> i16 { block0(v0: i16, v1: i64): @@ -462,8 +499,9 @@ block0(v0: i16, v1: i64): return v3 } -; check: sh %r2, 0(%r3) -; nextln: br %r14 +; block0: +; sh %r2, 0(%r3) +; br %r14 function %isub_i8(i8, i8) -> i8 { block0(v0: i8, v1: i8): @@ -471,8 +509,9 @@ block0(v0: i8, v1: i8): return v2 } -; check: sr %r2, %r3 -; nextln: br %r14 +; block0: +; sr %r2, %r3 +; br %r14 function %isub_i8_imm(i8) -> i8 { block0(v0: i8): @@ -481,8 +520,9 @@ block0(v0: i8): return v2 } -; check: ahi %r2, -1 -; nextln: br %r14 +; block0: +; ahi %r2, -1 +; br %r14 function %isub_i8_mem(i8, i64) -> i8 { block0(v0: i8, v1: i64): @@ -491,13 +531,10 @@ block0(v0: i8, v1: i64): return v3 } -; check: llc %r3, 0(%r3) -; nextln: sr %r2, %r3 -; nextln: br %r14 - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; IABS -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; block0: +; llc %r4, 0(%r3) +; sr %r2, %r4 +; br %r14 function %iabs_i64(i64) -> i64 { block0(v0: i64): @@ -505,8 +542,9 @@ block0(v0: i64): return v1 } -; check: lpgr %r2, %r2 -; nextln: br %r14 +; block0: +; lpgr %r2, %r2 +; br %r14 function %iabs_i64_ext32(i32) -> i64 { block0(v0: i32): @@ -515,8 +553,9 @@ block0(v0: i32): return v2 } -; check: lpgfr %r2, %r2 -; nextln: br %r14 +; block0: +; lpgfr %r2, %r2 +; br %r14 function %iabs_i32(i32) -> i32 { block0(v0: i32): @@ -524,8 +563,9 @@ block0(v0: i32): return v1 } -; check: lpr %r2, %r2 -; nextln: br %r14 +; block0: +; lpr %r2, %r2 +; br %r14 function %iabs_i16(i16) -> i16 { block0(v0: i16): @@ -533,9 +573,10 @@ block0(v0: i16): return v1 } -; check: lhr %r2, %r2 -; nextln: lpr %r2, %r2 -; nextln: br %r14 +; block0: +; lhr %r5, %r2 +; lpr %r2, %r5 +; br %r14 function %iabs_i8(i8) -> i8 { block0(v0: i8): @@ -543,15 +584,10 @@ block0(v0: i8): return v1 } -; check: lbr %r2, %r2 -; nextln: lpr %r2, %r2 -; nextln: br %r14 - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; INEG -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -; FIXME: neg-abs combination not yet supported +; block0: +; lbr %r5, %r2 +; lpr %r2, %r5 +; br %r14 function %ineg_i64(i64) -> i64 { block0(v0: i64): @@ -559,8 +595,9 @@ block0(v0: i64): return v1 } -; check: lcgr %r2, %r2 -; nextln: br %r14 +; block0: +; lcgr %r2, %r2 +; br %r14 function %ineg_i64_ext32(i32) -> i64 { block0(v0: i32): @@ -569,8 +606,9 @@ block0(v0: i32): return v2 } -; check: lcgfr %r2, %r2 -; nextln: br %r14 +; block0: +; lcgfr %r2, %r2 +; br %r14 function %ineg_i32(i32) -> i32 { block0(v0: i32): @@ -578,8 +616,9 @@ block0(v0: i32): return v1 } -; check: lcr %r2, %r2 -; nextln: br %r14 +; block0: +; lcr %r2, %r2 +; br %r14 function %ineg_i16(i16) -> i16 { block0(v0: i16): @@ -587,8 +626,9 @@ block0(v0: i16): return v1 } -; check: lcr %r2, %r2 -; nextln: br %r14 +; block0: +; lcr %r2, %r2 +; br %r14 function %ineg_i8(i8) -> i8 { block0(v0: i8): @@ -596,12 +636,9 @@ block0(v0: i8): return v1 } -; check: lcr %r2, %r2 -; nextln: br %r14 - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; IMUL -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; block0: +; lcr %r2, %r2 +; br %r14 function %imul_i64(i64, i64) -> i64 { block0(v0: i64, v1: i64): @@ -609,8 +646,9 @@ block0(v0: i64, v1: i64): return v2 } -; check: msgr %r2, %r3 -; nextln: br %r14 +; block0: +; msgr %r2, %r3 +; br %r14 function %imul_i64_imm16(i64) -> i64 { block0(v0: i64): @@ -619,8 +657,9 @@ block0(v0: i64): return v2 } -; check: mghi %r2, 3 -; nextln: br %r14 +; block0: +; mghi %r2, 3 +; br %r14 function %imul_i64_imm32(i64) -> i64 { block0(v0: i64): @@ -629,8 +668,9 @@ block0(v0: i64): return v2 } -; check: msgfi %r2, 32769 -; nextln: br %r14 +; block0: +; msgfi %r2, 32769 +; br %r14 function %imul_i64_mem(i64, i64) -> i64 { block0(v0: i64, v1: i64): @@ -639,8 +679,9 @@ block0(v0: i64, v1: i64): return v3 } -; check: msg %r2, 0(%r3) -; nextln: br %r14 +; block0: +; msg %r2, 0(%r3) +; br %r14 function %imul_i64_mem_ext16(i64, i64) -> i64 { block0(v0: i64, v1: i64): @@ -649,8 +690,9 @@ block0(v0: i64, v1: i64): return v3 } -; check: mgh %r2, 0(%r3) -; nextln: br %r14 +; block0: +; mgh %r2, 0(%r3) +; br %r14 function %imul_i64_mem_ext32(i64, i64) -> i64 { block0(v0: i64, v1: i64): @@ -659,8 +701,9 @@ block0(v0: i64, v1: i64): return v3 } -; check: msgf %r2, 0(%r3) -; nextln: br %r14 +; block0: +; msgf %r2, 0(%r3) +; br %r14 function %imul_i32(i32, i32) -> i32 { block0(v0: i32, v1: i32): @@ -668,8 +711,9 @@ block0(v0: i32, v1: i32): return v2 } -; check: msr %r2, %r3 -; nextln: br %r14 +; block0: +; msr %r2, %r3 +; br %r14 function %imul_i32_imm16(i32) -> i32 { block0(v0: i32): @@ -678,8 +722,9 @@ block0(v0: i32): return v2 } -; check: mhi %r2, 3 -; nextln: br %r14 +; block0: +; mhi %r2, 3 +; br %r14 function %imul_i32_imm32(i32) -> i32 { block0(v0: i32): @@ -688,8 +733,9 @@ block0(v0: i32): return v2 } -; check: msfi %r2, 32769 -; nextln: br %r14 +; block0: +; msfi %r2, 32769 +; br %r14 function %imul_i32_mem(i32, i64) -> i32 { block0(v0: i32, v1: i64): @@ -698,8 +744,9 @@ block0(v0: i32, v1: i64): return v3 } -; check: ms %r2, 0(%r3) -; nextln: br %r14 +; block0: +; ms %r2, 0(%r3) +; br %r14 function %imul_i32_memoff(i32, i64) -> i32 { block0(v0: i32, v1: i64): @@ -708,8 +755,9 @@ block0(v0: i32, v1: i64): return v3 } -; check: msy %r2, 4096(%r3) -; nextln: br %r14 +; block0: +; msy %r2, 4096(%r3) +; br %r14 function %imul_i32_mem_ext16(i32, i64) -> i32 { block0(v0: i32, v1: i64): @@ -718,8 +766,9 @@ block0(v0: i32, v1: i64): return v3 } -; check: mh %r2, 0(%r3) -; nextln: br %r14 +; block0: +; mh %r2, 0(%r3) +; br %r14 function %imul_i32_memoff_ext16(i32, i64) -> i32 { block0(v0: i32, v1: i64): @@ -728,8 +777,9 @@ block0(v0: i32, v1: i64): return v3 } -; check: mhy %r2, 4096(%r3) -; nextln: br %r14 +; block0: +; mhy %r2, 4096(%r3) +; br %r14 function %imul_i16(i16, i16) -> i16 { block0(v0: i16, v1: i16): @@ -737,8 +787,9 @@ block0(v0: i16, v1: i16): return v2 } -; check: msr %r2, %r3 -; nextln: br %r14 +; block0: +; msr %r2, %r3 +; br %r14 function %imul_i16_imm(i16) -> i16 { block0(v0: i16): @@ -747,8 +798,9 @@ block0(v0: i16): return v2 } -; check: mhi %r2, 3 -; nextln: br %r14 +; block0: +; mhi %r2, 3 +; br %r14 function %imul_i16_mem(i16, i64) -> i16 { block0(v0: i16, v1: i64): @@ -757,8 +809,9 @@ block0(v0: i16, v1: i64): return v3 } -; check: mh %r2, 0(%r3) -; nextln: br %r14 +; block0: +; mh %r2, 0(%r3) +; br %r14 function %imul_i8(i8, i8) -> i8 { block0(v0: i8, v1: i8): @@ -766,8 +819,9 @@ block0(v0: i8, v1: i8): return v2 } -; check: msr %r2, %r3 -; nextln: br %r14 +; block0: +; msr %r2, %r3 +; br %r14 function %imul_i8_imm(i8) -> i8 { block0(v0: i8): @@ -776,8 +830,9 @@ block0(v0: i8): return v2 } -; check: mhi %r2, 3 -; nextln: br %r14 +; block0: +; mhi %r2, 3 +; br %r14 function %imul_i8_mem(i8, i64) -> i8 { block0(v0: i8, v1: i64): @@ -786,13 +841,10 @@ block0(v0: i8, v1: i64): return v3 } -; check: llc %r3, 0(%r3) -; nextln: msr %r2, %r3 -; nextln: br %r14 - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; UMULHI -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; block0: +; llc %r4, 0(%r3) +; msr %r2, %r4 +; br %r14 function %umulhi_i64(i64, i64) -> i64 { block0(v0: i64, v1: i64): @@ -800,10 +852,11 @@ block0(v0: i64, v1: i64): return v2 } -; check: lgr %r1, %r3 -; nextln: mlgr %r0, %r2 -; nextln: lgr %r2, %r0 -; nextln: br %r14 +; block0: +; lgr %r1, %r3 +; mlgr %r0, %r2 +; lgr %r2, %r0 +; br %r14 function %umulhi_i32(i32, i32) -> i32 { block0(v0: i32, v1: i32): @@ -811,11 +864,14 @@ block0(v0: i32, v1: i32): return v2 } -; check: llgfr %r2, %r2 -; nextln: llgfr %r3, %r3 -; nextln: msgr %r2, %r3 -; nextln: srlg %r2, %r2, 32 -; nextln: br %r14 +; block0: +; lgr %r4, %r3 +; llgfr %r3, %r2 +; lgr %r2, %r4 +; llgfr %r5, %r2 +; msgr %r3, %r5 +; srlg %r2, %r3, 32 +; br %r14 function %umulhi_i16(i16, i16) -> i16 { block0(v0: i16, v1: i16): @@ -823,11 +879,14 @@ block0(v0: i16, v1: i16): return v2 } -; check: llhr %r2, %r2 -; nextln: llhr %r3, %r3 -; nextln: msr %r2, %r3 -; nextln: srlk %r2, %r2, 16 -; nextln: br %r14 +; block0: +; lgr %r4, %r3 +; llhr %r3, %r2 +; lgr %r2, %r4 +; llhr %r5, %r2 +; msr %r3, %r5 +; srlk %r2, %r3, 16 +; br %r14 function %umulhi_i8(i8, i8) -> i8 { block0(v0: i8, v1: i8): @@ -835,15 +894,14 @@ block0(v0: i8, v1: i8): return v2 } -; check: llcr %r2, %r2 -; nextln: llcr %r3, %r3 -; nextln: msr %r2, %r3 -; nextln: srlk %r2, %r2, 8 -; nextln: br %r14 - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; SMULHI -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; block0: +; lgr %r4, %r3 +; llcr %r3, %r2 +; lgr %r2, %r4 +; llcr %r5, %r2 +; msr %r3, %r5 +; srlk %r2, %r3, 8 +; br %r14 function %smulhi_i64(i64, i64) -> i64 { block0(v0: i64, v1: i64): @@ -851,9 +909,10 @@ block0(v0: i64, v1: i64): return v2 } -; check: mgrk %r0, %r2, %r3 -; nextln: lgr %r2, %r0 -; nextln: br %r14 +; block0: +; mgrk %r0, %r2, %r3 +; lgr %r2, %r0 +; br %r14 function %smulhi_i32(i32, i32) -> i32 { block0(v0: i32, v1: i32): @@ -861,11 +920,14 @@ block0(v0: i32, v1: i32): return v2 } -; check: lgfr %r2, %r2 -; nextln: lgfr %r3, %r3 -; nextln: msgr %r2, %r3 -; nextln: srag %r2, %r2, 32 -; nextln: br %r14 +; block0: +; lgr %r4, %r3 +; lgfr %r3, %r2 +; lgr %r2, %r4 +; lgfr %r5, %r2 +; msgr %r3, %r5 +; srag %r2, %r3, 32 +; br %r14 function %smulhi_i16(i16, i16) -> i16 { block0(v0: i16, v1: i16): @@ -873,11 +935,14 @@ block0(v0: i16, v1: i16): return v2 } -; check: lhr %r2, %r2 -; nextln: lhr %r3, %r3 -; nextln: msr %r2, %r3 -; nextln: srak %r2, %r2, 16 -; nextln: br %r14 +; block0: +; lgr %r4, %r3 +; lhr %r3, %r2 +; lgr %r2, %r4 +; lhr %r5, %r2 +; msr %r3, %r5 +; srak %r2, %r3, 16 +; br %r14 function %smulhi_i8(i8, i8) -> i8 { block0(v0: i8, v1: i8): @@ -885,15 +950,14 @@ block0(v0: i8, v1: i8): return v2 } -; check: lbr %r2, %r2 -; nextln: lbr %r3, %r3 -; nextln: msr %r2, %r3 -; nextln: srak %r2, %r2, 8 -; nextln: br %r14 - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; SDIV -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; block0: +; lgr %r4, %r3 +; lbr %r3, %r2 +; lgr %r2, %r4 +; lbr %r5, %r2 +; msr %r3, %r5 +; srak %r2, %r3, 8 +; br %r14 function %sdiv_i64(i64, i64) -> i64 { block0(v0: i64, v1: i64): @@ -901,15 +965,16 @@ block0(v0: i64, v1: i64): return v2 } -; check: lgr %r1, %r2 -; nextln: llihf %r2, 2147483647 -; nextln: iilf %r2, 4294967295 -; nextln: xgr %r2, %r1 -; nextln: ngr %r2, %r3 -; nextln: cgite %r2, -1 -; nextln: dsgr %r0, %r3 -; nextln: lgr %r2, %r1 -; nextln: br %r14 +; block0: +; lgr %r1, %r2 +; llihf %r4, 2147483647 +; iilf %r4, 4294967295 +; xgr %r4, %r1 +; ngrk %r5, %r4, %r3 +; cgite %r5, -1 +; dsgr %r0, %r3 +; lgr %r2, %r1 +; br %r14 function %sdiv_i64_imm(i64) -> i64 { block0(v0: i64): @@ -918,11 +983,12 @@ block0(v0: i64): return v2 } -; check: lgr %r1, %r2 -; nextln: lghi %r2, 2 -; nextln: dsgr %r0, %r2 -; nextln: lgr %r2, %r1 -; nextln: br %r14 +; block0: +; lgr %r1, %r2 +; lghi %r2, 2 +; dsgr %r0, %r2 +; lgr %r2, %r1 +; br %r14 function %sdiv_i32(i32, i32) -> i32 { block0(v0: i32, v1: i32): @@ -930,14 +996,15 @@ block0(v0: i32, v1: i32): return v2 } -; check: lgfr %r1, %r2 -; nextln: iilf %r2, 2147483647 -; nextln: xr %r2, %r1 -; nextln: nr %r2, %r3 -; nextln: cite %r2, -1 -; nextln: dsgfr %r0, %r3 -; nextln: lr %r2, %r1 -; nextln: br %r14 +; block0: +; lgfr %r1, %r2 +; iilf %r4, 2147483647 +; xrk %r2, %r4, %r1 +; nrk %r4, %r2, %r3 +; cite %r4, -1 +; dsgfr %r0, %r3 +; lgr %r2, %r1 +; br %r14 function %sdiv_i32_imm(i32) -> i32 { block0(v0: i32): @@ -946,11 +1013,12 @@ block0(v0: i32): return v2 } -; check: lgfr %r1, %r2 -; nextln: lhi %r2, 2 -; nextln: dsgfr %r0, %r2 -; nextln: lr %r2, %r1 -; nextln: br %r14 +; block0: +; lgfr %r1, %r2 +; lhi %r2, 2 +; dsgfr %r0, %r2 +; lgr %r2, %r1 +; br %r14 function %sdiv_i16(i16, i16) -> i16 { block0(v0: i16, v1: i16): @@ -958,15 +1026,16 @@ block0(v0: i16, v1: i16): return v2 } -; check: lghr %r1, %r2 -; nextln: lhr %r2, %r3 -; nextln: lhi %r3, 32767 -; nextln: xr %r3, %r1 -; nextln: nr %r3, %r2 -; nextln: cite %r3, -1 -; nextln: dsgfr %r0, %r2 -; nextln: lr %r2, %r1 -; nextln: br %r14 +; block0: +; lghr %r1, %r2 +; lhr %r4, %r3 +; lhi %r2, 32767 +; xrk %r5, %r2, %r1 +; nrk %r2, %r5, %r4 +; cite %r2, -1 +; dsgfr %r0, %r4 +; lgr %r2, %r1 +; br %r14 function %sdiv_i16_imm(i16) -> i16 { block0(v0: i16): @@ -975,11 +1044,12 @@ block0(v0: i16): return v2 } -; check: lghr %r1, %r2 -; nextln: lhi %r2, 2 -; nextln: dsgfr %r0, %r2 -; nextln: lr %r2, %r1 -; nextln: br %r14 +; block0: +; lghr %r1, %r2 +; lhi %r2, 2 +; dsgfr %r0, %r2 +; lgr %r2, %r1 +; br %r14 function %sdiv_i8(i8, i8) -> i8 { block0(v0: i8, v1: i8): @@ -987,15 +1057,16 @@ block0(v0: i8, v1: i8): return v2 } -; check: lgbr %r1, %r2 -; nextln: lbr %r2, %r3 -; nextln: lhi %r3, 127 -; nextln: xr %r3, %r1 -; nextln: nr %r3, %r2 -; nextln: cite %r3, -1 -; nextln: dsgfr %r0, %r2 -; nextln: lr %r2, %r1 -; nextln: br %r14 +; block0: +; lgbr %r1, %r2 +; lbr %r4, %r3 +; lhi %r2, 127 +; xrk %r5, %r2, %r1 +; nrk %r2, %r5, %r4 +; cite %r2, -1 +; dsgfr %r0, %r4 +; lgr %r2, %r1 +; br %r14 function %sdiv_i8_imm(i8) -> i8 { block0(v0: i8): @@ -1004,15 +1075,12 @@ block0(v0: i8): return v2 } -; check: lgbr %r1, %r2 -; nextln: lhi %r2, 2 -; nextln: dsgfr %r0, %r2 -; nextln: lr %r2, %r1 -; nextln: br %r14 - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; UDIV -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; block0: +; lgbr %r1, %r2 +; lhi %r2, 2 +; dsgfr %r0, %r2 +; lgr %r2, %r1 +; br %r14 function %udiv_i64(i64, i64) -> i64 { block0(v0: i64, v1: i64): @@ -1020,11 +1088,12 @@ block0(v0: i64, v1: i64): return v2 } -; check: lghi %r0, 0 -; nextln: lgr %r1, %r2 -; nextln: dlgr %r0, %r3 -; nextln: lgr %r2, %r1 -; nextln: br %r14 +; block0: +; lghi %r0, 0 +; lgr %r1, %r2 +; dlgr %r0, %r3 +; lgr %r2, %r1 +; br %r14 function %udiv_i64_imm(i64) -> i64 { block0(v0: i64): @@ -1033,12 +1102,13 @@ block0(v0: i64): return v2 } -; check: lghi %r0, 0 -; nextln: lgr %r1, %r2 -; nextln: lghi %r2, 2 -; nextln: dlgr %r0, %r2 -; nextln: lgr %r2, %r1 -; nextln: br %r14 +; block0: +; lghi %r0, 0 +; lgr %r1, %r2 +; lghi %r3, 2 +; dlgr %r0, %r3 +; lgr %r2, %r1 +; br %r14 function %udiv_i32(i32, i32) -> i32 { block0(v0: i32, v1: i32): @@ -1046,11 +1116,12 @@ block0(v0: i32, v1: i32): return v2 } -; check: lhi %r0, 0 -; nextln: lr %r1, %r2 -; nextln: dlr %r0, %r3 -; nextln: lr %r2, %r1 -; nextln: br %r14 +; block0: +; lhi %r0, 0 +; lgr %r1, %r2 +; dlr %r0, %r3 +; lgr %r2, %r1 +; br %r14 function %udiv_i32_imm(i32) -> i32 { block0(v0: i32): @@ -1059,12 +1130,13 @@ block0(v0: i32): return v2 } -; check: lhi %r0, 0 -; nextln: lr %r1, %r2 -; nextln: lhi %r2, 2 -; nextln: dlr %r0, %r2 -; nextln: lr %r2, %r1 -; nextln: br %r14 +; block0: +; lhi %r0, 0 +; lgr %r1, %r2 +; lhi %r3, 2 +; dlr %r0, %r3 +; lgr %r2, %r1 +; br %r14 function %udiv_i16(i16, i16) -> i16 { block0(v0: i16, v1: i16): @@ -1072,12 +1144,13 @@ block0(v0: i16, v1: i16): return v2 } -; check: lhi %r0, 0 -; nextln: llhr %r1, %r2 -; nextln: llhr %r2, %r3 -; nextln: dlr %r0, %r2 -; nextln: lr %r2, %r1 -; nextln: br %r14 +; block0: +; lhi %r0, 0 +; llhr %r1, %r2 +; llhr %r5, %r3 +; dlr %r0, %r5 +; lgr %r2, %r1 +; br %r14 function %udiv_i16_imm(i16) -> i16 { block0(v0: i16): @@ -1086,12 +1159,13 @@ block0(v0: i16): return v2 } -; check: lhi %r0, 0 -; nextln: llhr %r1, %r2 -; nextln: lhi %r2, 2 -; nextln: dlr %r0, %r2 -; nextln: lr %r2, %r1 -; nextln: br %r14 +; block0: +; lhi %r0, 0 +; llhr %r1, %r2 +; lhi %r3, 2 +; dlr %r0, %r3 +; lgr %r2, %r1 +; br %r14 function %udiv_i8(i8, i8) -> i8 { block0(v0: i8, v1: i8): @@ -1099,12 +1173,13 @@ block0(v0: i8, v1: i8): return v2 } -; check: lhi %r0, 0 -; nextln: llcr %r1, %r2 -; nextln: llcr %r2, %r3 -; nextln: dlr %r0, %r2 -; nextln: lr %r2, %r1 -; nextln: br %r14 +; block0: +; lhi %r0, 0 +; llcr %r1, %r2 +; llcr %r5, %r3 +; dlr %r0, %r5 +; lgr %r2, %r1 +; br %r14 function %udiv_i8_imm(i8) -> i8 { block0(v0: i8): @@ -1113,16 +1188,13 @@ block0(v0: i8): return v2 } -; check: lhi %r0, 0 -; nextln: llcr %r1, %r2 -; nextln: lhi %r2, 2 -; nextln: dlr %r0, %r2 -; nextln: lr %r2, %r1 -; nextln: br %r14 - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; SREM -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; block0: +; lhi %r0, 0 +; llcr %r1, %r2 +; lhi %r3, 2 +; dlr %r0, %r3 +; lgr %r2, %r1 +; br %r14 function %srem_i64(i64, i64) -> i64 { block0(v0: i64, v1: i64): @@ -1130,12 +1202,13 @@ block0(v0: i64, v1: i64): return v2 } -; check: lgr %r1, %r2 -; nextln: cghi %r3, -1 -; nextln: locghie %r1, 0 -; nextln: dsgr %r0, %r3 -; nextln: lgr %r2, %r0 -; nextln: br %r14 +; block0: +; lgr %r1, %r2 +; cghi %r3, -1 +; locghie %r1, 0 +; dsgr %r0, %r3 +; lgr %r2, %r0 +; br %r14 function %srem_i32(i32, i32) -> i32 { block0(v0: i32, v1: i32): @@ -1143,10 +1216,11 @@ block0(v0: i32, v1: i32): return v2 } -; check: lgfr %r1, %r2 -; nextln: dsgfr %r0, %r3 -; nextln: lr %r2, %r0 -; nextln: br %r14 +; block0: +; lgfr %r1, %r2 +; dsgfr %r0, %r3 +; lgr %r2, %r0 +; br %r14 function %srem_i16(i16, i16) -> i16 { block0(v0: i16, v1: i16): @@ -1154,11 +1228,12 @@ block0(v0: i16, v1: i16): return v2 } -; check: lghr %r1, %r2 -; nextln: lhr %r2, %r3 -; nextln: dsgfr %r0, %r2 -; nextln: lr %r2, %r0 -; nextln: br %r14 +; block0: +; lghr %r1, %r2 +; lhr %r4, %r3 +; dsgfr %r0, %r4 +; lgr %r2, %r0 +; br %r14 function %srem_i8(i8, i8) -> i8 { block0(v0: i8, v1: i8): @@ -1166,15 +1241,12 @@ block0(v0: i8, v1: i8): return v2 } -; check: lgbr %r1, %r2 -; nextln: lbr %r2, %r3 -; nextln: dsgfr %r0, %r2 -; nextln: lr %r2, %r0 -; nextln: br %r14 - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; UREM -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; block0: +; lgbr %r1, %r2 +; lbr %r4, %r3 +; dsgfr %r0, %r4 +; lgr %r2, %r0 +; br %r14 function %urem_i64(i64, i64) -> i64 { block0(v0: i64, v1: i64): @@ -1182,11 +1254,12 @@ block0(v0: i64, v1: i64): return v2 } -; check: lghi %r0, 0 -; nextln: lgr %r1, %r2 -; nextln: dlgr %r0, %r3 -; nextln: lgr %r2, %r0 -; nextln: br %r14 +; block0: +; lghi %r0, 0 +; lgr %r1, %r2 +; dlgr %r0, %r3 +; lgr %r2, %r0 +; br %r14 function %urem_i32(i32, i32) -> i32 { block0(v0: i32, v1: i32): @@ -1194,11 +1267,12 @@ block0(v0: i32, v1: i32): return v2 } -; check: lhi %r0, 0 -; nextln: lr %r1, %r2 -; nextln: dlr %r0, %r3 -; nextln: lr %r2, %r0 -; nextln: br %r14 +; block0: +; lhi %r0, 0 +; lgr %r1, %r2 +; dlr %r0, %r3 +; lgr %r2, %r0 +; br %r14 function %urem_i16(i16, i16) -> i16 { block0(v0: i16, v1: i16): @@ -1206,12 +1280,13 @@ block0(v0: i16, v1: i16): return v2 } -; check: lhi %r0, 0 -; check: llhr %r1, %r2 -; nextln: llhr %r2, %r3 -; nextln: dlr %r0, %r2 -; nextln: lr %r2, %r0 -; nextln: br %r14 +; block0: +; lhi %r0, 0 +; llhr %r1, %r2 +; llhr %r5, %r3 +; dlr %r0, %r5 +; lgr %r2, %r0 +; br %r14 function %urem_i8(i8, i8) -> i8 { block0(v0: i8, v1: i8): @@ -1219,10 +1294,11 @@ block0(v0: i8, v1: i8): return v2 } -; check: lhi %r0, 0 -; check: llcr %r1, %r2 -; nextln: llcr %r2, %r3 -; nextln: dlr %r0, %r2 -; nextln: lr %r2, %r0 -; nextln: br %r14 +; block0: +; lhi %r0, 0 +; llcr %r1, %r2 +; llcr %r5, %r3 +; dlr %r0, %r5 +; lgr %r2, %r0 +; br %r14 diff --git a/cranelift/filetests/filetests/isa/s390x/atomic_cas-little.clif b/cranelift/filetests/filetests/isa/s390x/atomic_cas-little.clif index 53eed240c6..624f0d1849 100644 --- a/cranelift/filetests/filetests/isa/s390x/atomic_cas-little.clif +++ b/cranelift/filetests/filetests/isa/s390x/atomic_cas-little.clif @@ -1,4 +1,4 @@ -test compile +test compile precise-output target s390x ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; @@ -11,11 +11,12 @@ block0(v0: i64, v1: i64, v2: i64): return v3 } -; check: lrvgr %r2, %r2 -; nextln: lrvgr %r3, %r3 -; nextln: csg %r2, %r3, 0(%r4) -; nextln: lrvgr %r2, %r2 -; nextln: br %r14 +; block0: +; lrvgr %r5, %r2 +; lrvgr %r3, %r3 +; csg %r5, %r3, 0(%r4) +; lrvgr %r2, %r5 +; br %r14 function %atomic_cas_i32(i32, i32, i64) -> i32 { block0(v0: i32, v1: i32, v2: i64): @@ -23,11 +24,12 @@ block0(v0: i32, v1: i32, v2: i64): return v3 } -; check: lrvr %r2, %r2 -; nextln: lrvr %r3, %r3 -; nextln: cs %r2, %r3, 0(%r4) -; nextln: lrvr %r2, %r2 -; nextln: br %r14 +; block0: +; lrvr %r5, %r2 +; lrvr %r3, %r3 +; cs %r5, %r3, 0(%r4) +; lrvr %r2, %r5 +; br %r14 function %atomic_cas_i16(i64, i16, i16, i64) -> i16 { block0(v0: i64, v1: i16, v2: i16, v3: i64): @@ -35,15 +37,20 @@ block0(v0: i64, v1: i16, v2: i16, v3: i64): return v4 } -; check: sllk %r2, %r5, 3 -; nextln: nill %r5, 65532 -; nextln: lrvr %r3, %r3 -; nextln: lrvr %r4, %r4 -; nextln: l %r0, 0(%r5) -; nextln: 0: rll %r1, %r0, 16(%r2) ; rxsbg %r1, %r3, 176, 64, 48 ; jglh 1f ; risbgn %r1, %r4, 48, 64, 48 ; rll %r1, %r1, 16(%r2) ; cs %r0, %r1, 0(%r5) ; jglh 0b ; 1: -; nextln: rll %r2, %r0, 0(%r2) -; nextln: lrvr %r2, %r2 -; nextln: br %r14 +; stmg %r6, %r15, 48(%r15) +; block0: +; lgr %r6, %r3 +; sllk %r3, %r5, 3 +; nill %r5, 65532 +; lgr %r2, %r6 +; lrvr %r2, %r2 +; lrvr %r4, %r4 +; l %r0, 0(%r5) +; 0: rll %r1, %r0, 16(%r3) ; rxsbg %r1, %r2, 176, 64, 48 ; jglh 1f ; risbgn %r1, %r4, 48, 64, 48 ; rll %r1, %r1, 16(%r3) ; cs %r0, %r1, 0(%r5) ; jglh 0b ; 1: +; rll %r2, %r0, 0(%r3) +; lrvr %r2, %r2 +; lmg %r6, %r15, 48(%r15) +; br %r14 function %atomic_cas_i8(i64, i8, i8, i64) -> i8 { block0(v0: i64, v1: i8, v2: i8, v3: i64): @@ -51,13 +58,14 @@ block0(v0: i64, v1: i8, v2: i8, v3: i64): return v4 } -; check: stmg %r14, %r15, 112(%r15) -; nextln: sllk %r2, %r5, 3 -; nextln: nill %r5, 65532 -; nextln: lcr %r14, %r2 -; nextln: l %r0, 0(%r5) -; nextln: 0: rll %r1, %r0, 0(%r2) ; rxsbg %r1, %r3, 160, 40, 24 ; jglh 1f ; risbgn %r1, %r4, 32, 40, 24 ; rll %r1, %r1, 0(%r14) ; cs %r0, %r1, 0(%r5) ; jglh 0b -; nextln: rll %r2, %r0, 8(%r2) -; nextln: lmg %r14, %r15, 112(%r15) -; nextln: br %r14 +; stmg %r12, %r15, 96(%r15) +; block0: +; sllk %r2, %r5, 3 +; nill %r5, 65532 +; lcr %r12, %r2 +; l %r0, 0(%r5) +; 0: rll %r1, %r0, 0(%r2) ; rxsbg %r1, %r3, 160, 40, 24 ; jglh 1f ; risbgn %r1, %r4, 32, 40, 24 ; rll %r1, %r1, 0(%r12) ; cs %r0, %r1, 0(%r5) ; jglh 0b ; 1: +; rll %r2, %r0, 8(%r2) +; lmg %r12, %r15, 96(%r15) +; br %r14 diff --git a/cranelift/filetests/filetests/isa/s390x/atomic_cas.clif b/cranelift/filetests/filetests/isa/s390x/atomic_cas.clif index 5d9e79786f..a7d94fc46c 100644 --- a/cranelift/filetests/filetests/isa/s390x/atomic_cas.clif +++ b/cranelift/filetests/filetests/isa/s390x/atomic_cas.clif @@ -1,4 +1,4 @@ -test compile +test compile precise-output target s390x ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; @@ -11,8 +11,9 @@ block0(v0: i64, v1: i64, v2: i64): return v3 } -; check: csg %r2, %r3, 0(%r4) -; nextln: br %r14 +; block0: +; csg %r2, %r3, 0(%r4) +; br %r14 function %atomic_cas_i32(i32, i32, i64) -> i32 { block0(v0: i32, v1: i32, v2: i64): @@ -20,8 +21,9 @@ block0(v0: i32, v1: i32, v2: i64): return v3 } -; check: cs %r2, %r3, 0(%r4) -; nextln: br %r14 +; block0: +; cs %r2, %r3, 0(%r4) +; br %r14 function %atomic_cas_i16(i64, i16, i16, i64) -> i16 { block0(v0: i64, v1: i16, v2: i16, v3: i64): @@ -29,12 +31,13 @@ block0(v0: i64, v1: i16, v2: i16, v3: i64): return v4 } -; check: sllk %r2, %r5, 3 -; nextln: nill %r5, 65532 -; nextln: l %r0, 0(%r5) -; nextln: 0: rll %r1, %r0, 0(%r2) ; rxsbg %r1, %r3, 160, 48, 16 ; jglh 1f ; risbgn %r1, %r4, 32, 48, 16 ; rll %r1, %r1, 0(%r2) ; cs %r0, %r1, 0(%r5) ; jglh 0b ; 1: -; nextln: rll %r2, %r0, 16(%r2) -; nextln: br %r14 +; block0: +; sllk %r2, %r5, 3 +; nill %r5, 65532 +; l %r0, 0(%r5) +; 0: rll %r1, %r0, 0(%r2) ; rxsbg %r1, %r3, 160, 48, 16 ; jglh 1f ; risbgn %r1, %r4, 32, 48, 16 ; rll %r1, %r1, 0(%r2) ; cs %r0, %r1, 0(%r5) ; jglh 0b ; 1: +; rll %r2, %r0, 16(%r2) +; br %r14 function %atomic_cas_i8(i64, i8, i8, i64) -> i8 { block0(v0: i64, v1: i8, v2: i8, v3: i64): @@ -42,13 +45,14 @@ block0(v0: i64, v1: i8, v2: i8, v3: i64): return v4 } -; check: stmg %r14, %r15, 112(%r15) -; nextln: sllk %r2, %r5, 3 -; nextln: nill %r5, 65532 -; nextln: lcr %r14, %r2 -; nextln: l %r0, 0(%r5) -; nextln: 0: rll %r1, %r0, 0(%r2) ; rxsbg %r1, %r3, 160, 40, 24 ; jglh 1f ; risbgn %r1, %r4, 32, 40, 24 ; rll %r1, %r1, 0(%r14) ; cs %r0, %r1, 0(%r5) ; jglh 0b -; nextln: rll %r2, %r0, 8(%r2) -; nextln: lmg %r14, %r15, 112(%r15) -; nextln: br %r14 +; stmg %r12, %r15, 96(%r15) +; block0: +; sllk %r2, %r5, 3 +; nill %r5, 65532 +; lcr %r12, %r2 +; l %r0, 0(%r5) +; 0: rll %r1, %r0, 0(%r2) ; rxsbg %r1, %r3, 160, 40, 24 ; jglh 1f ; risbgn %r1, %r4, 32, 40, 24 ; rll %r1, %r1, 0(%r12) ; cs %r0, %r1, 0(%r5) ; jglh 0b ; 1: +; rll %r2, %r0, 8(%r2) +; lmg %r12, %r15, 96(%r15) +; br %r14 diff --git a/cranelift/filetests/filetests/isa/s390x/atomic_load-little.clif b/cranelift/filetests/filetests/isa/s390x/atomic_load-little.clif index 5556176bbb..fa493bcdd0 100644 --- a/cranelift/filetests/filetests/isa/s390x/atomic_load-little.clif +++ b/cranelift/filetests/filetests/isa/s390x/atomic_load-little.clif @@ -1,4 +1,4 @@ -test compile +test compile precise-output target s390x function %atomic_load_i64(i64) -> i64 { @@ -7,8 +7,9 @@ block0(v0: i64): return v1 } -; check: lrvg %r2, 0(%r2) -; nextln: br %r14 +; block0: +; lrvg %r2, 0(%r2) +; br %r14 function %atomic_load_i64_sym() -> i64 { gv0 = symbol colocated %sym @@ -18,8 +19,9 @@ block0: return v1 } -; check: larl %r1, %sym + 0 ; lrvg %r2, 0(%r1) -; nextln: br %r14 +; block0: +; larl %r1, %sym + 0 ; lrvg %r2, 0(%r1) +; br %r14 function %atomic_load_i32(i64) -> i32 { block0(v0: i64): @@ -27,8 +29,9 @@ block0(v0: i64): return v1 } -; check: lrv %r2, 0(%r2) -; nextln: br %r14 +; block0: +; lrv %r2, 0(%r2) +; br %r14 function %atomic_load_i32_sym() -> i32 { gv0 = symbol colocated %sym @@ -38,8 +41,9 @@ block0: return v1 } -; check: larl %r1, %sym + 0 ; lrv %r2, 0(%r1) -; nextln: br %r14 +; block0: +; larl %r1, %sym + 0 ; lrv %r2, 0(%r1) +; br %r14 function %atomic_load_i16(i64) -> i16 { block0(v0: i64): @@ -47,8 +51,9 @@ block0(v0: i64): return v1 } -; check: lrvh %r2, 0(%r2) -; nextln: br %r14 +; block0: +; lrvh %r2, 0(%r2) +; br %r14 function %atomic_load_i16_sym() -> i16 { gv0 = symbol colocated %sym @@ -58,8 +63,9 @@ block0: return v1 } -; check: larl %r1, %sym + 0 ; lrvh %r2, 0(%r1) -; nextln: br %r14 +; block0: +; larl %r1, %sym + 0 ; lrvh %r2, 0(%r1) +; br %r14 function %atomic_load_i8(i64) -> i8 { block0(v0: i64): @@ -67,6 +73,7 @@ block0(v0: i64): return v1 } -; check: llc %r2, 0(%r2) -; nextln: br %r14 +; block0: +; llc %r2, 0(%r2) +; br %r14 diff --git a/cranelift/filetests/filetests/isa/s390x/atomic_load.clif b/cranelift/filetests/filetests/isa/s390x/atomic_load.clif index b361aaa4c7..673577633b 100644 --- a/cranelift/filetests/filetests/isa/s390x/atomic_load.clif +++ b/cranelift/filetests/filetests/isa/s390x/atomic_load.clif @@ -1,4 +1,4 @@ -test compile +test compile precise-output target s390x function %atomic_load_i64(i64) -> i64 { @@ -7,8 +7,9 @@ block0(v0: i64): return v1 } -; check: lg %r2, 0(%r2) -; nextln: br %r14 +; block0: +; lg %r2, 0(%r2) +; br %r14 function %atomic_load_i64_sym() -> i64 { gv0 = symbol colocated %sym @@ -18,8 +19,9 @@ block0: return v1 } -; check: lgrl %r2, %sym + 0 -; nextln: br %r14 +; block0: +; lgrl %r2, %sym + 0 +; br %r14 function %atomic_load_i32(i64) -> i32 { block0(v0: i64): @@ -27,8 +29,9 @@ block0(v0: i64): return v1 } -; check: l %r2, 0(%r2) -; nextln: br %r14 +; block0: +; l %r2, 0(%r2) +; br %r14 function %atomic_load_i32_sym() -> i32 { gv0 = symbol colocated %sym @@ -38,8 +41,9 @@ block0: return v1 } -; check: lrl %r2, %sym + 0 -; nextln: br %r14 +; block0: +; lrl %r2, %sym + 0 +; br %r14 function %atomic_load_i16(i64) -> i16 { block0(v0: i64): @@ -47,8 +51,9 @@ block0(v0: i64): return v1 } -; check: llh %r2, 0(%r2) -; nextln: br %r14 +; block0: +; llh %r2, 0(%r2) +; br %r14 function %atomic_load_i16_sym() -> i16 { gv0 = symbol colocated %sym @@ -58,8 +63,9 @@ block0: return v1 } -; check: llhrl %r2, %sym + 0 -; nextln: br %r14 +; block0: +; llhrl %r2, %sym + 0 +; br %r14 function %atomic_load_i8(i64) -> i8 { block0(v0: i64): @@ -67,6 +73,7 @@ block0(v0: i64): return v1 } -; check: llc %r2, 0(%r2) -; nextln: br %r14 +; block0: +; llc %r2, 0(%r2) +; br %r14 diff --git a/cranelift/filetests/filetests/isa/s390x/atomic_rmw-arch13.clif b/cranelift/filetests/filetests/isa/s390x/atomic_rmw-arch13.clif index 28cf8be01e..b23455eee1 100644 --- a/cranelift/filetests/filetests/isa/s390x/atomic_rmw-arch13.clif +++ b/cranelift/filetests/filetests/isa/s390x/atomic_rmw-arch13.clif @@ -1,4 +1,4 @@ -test compile +test compile precise-output target s390x arch13 function %atomic_rmw_nand_i64(i64, i64, i64) -> i64 { @@ -7,10 +7,11 @@ block0(v0: i64, v1: i64, v2: i64): return v3 } -; check: lg %r0, 0(%r3) -; nextln: 0: nngrk %r1, %r0, %r4 ; csg %r0, %r1, 0(%r3) ; jglh 0b ; 1: -; nextln: lgr %r2, %r0 -; nextln: br %r14 +; block0: +; lg %r0, 0(%r3) +; 0: nngrk %r1, %r0, %r4 ; csg %r0, %r1, 0(%r3) ; jglh 0b ; 1: +; lgr %r2, %r0 +; br %r14 function %atomic_rmw_nand_i32(i64, i64, i32) -> i32 { block0(v0: i64, v1: i64, v2: i32): @@ -18,10 +19,11 @@ block0(v0: i64, v1: i64, v2: i32): return v3 } -; check: l %r0, 0(%r3) -; nextln: 0: nnrk %r1, %r0, %r4 ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: -; nextln: lr %r2, %r0 -; nextln: br %r14 +; block0: +; l %r0, 0(%r3) +; 0: nnrk %r1, %r0, %r4 ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: +; lgr %r2, %r0 +; br %r14 function %atomic_rmw_nand_i16(i64, i64, i16) -> i16 { block0(v0: i64, v1: i64, v2: i16): @@ -29,12 +31,13 @@ block0(v0: i64, v1: i64, v2: i16): return v3 } -; check: sllk %r2, %r3, 3 -; nextln: nill %r3, 65532 -; nextln: l %r0, 0(%r3) -; nextln: 0: rll %r1, %r0, 0(%r2) ; rnsbg %r1, %r4, 32, 48, 16 ; xilf %r1, 4294901760 ; rll %r1, %r1, 0(%r2) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: -; nextln: rll %r2, %r0, 16(%r2) -; nextln: br %r14 +; block0: +; sllk %r5, %r3, 3 +; nill %r3, 65532 +; l %r0, 0(%r3) +; 0: rll %r1, %r0, 0(%r5) ; rnsbg %r1, %r4, 32, 48, 16 ; xilf %r1, 4294901760 ; rll %r1, %r1, 0(%r5) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: +; rll %r2, %r0, 16(%r5) +; br %r14 function %atomic_rmw_nand_i8(i64, i64, i8) -> i8 { block0(v0: i64, v1: i64, v2: i8): @@ -42,13 +45,14 @@ block0(v0: i64, v1: i64, v2: i8): return v3 } -; check: sllk %r2, %r3, 3 -; nextln: nill %r3, 65532 -; nextln: lcr %r5, %r2 -; nextln: l %r0, 0(%r3) -; nextln: 0: rll %r1, %r0, 0(%r2) ; rnsbg %r1, %r4, 32, 40, 24 ; xilf %r1, 4278190080 ; rll %r1, %r1, 0(%r5) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: -; nextln: rll %r2, %r0, 8(%r2) -; nextln: br %r14 +; block0: +; sllk %r5, %r3, 3 +; nill %r3, 65532 +; lcr %r2, %r5 +; l %r0, 0(%r3) +; 0: rll %r1, %r0, 0(%r5) ; rnsbg %r1, %r4, 32, 40, 24 ; xilf %r1, 4278190080 ; rll %r1, %r1, 0(%r2) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: +; rll %r2, %r0, 8(%r5) +; br %r14 function %atomic_rmw_nand_i64(i64, i64, i64) -> i64 { block0(v0: i64, v1: i64, v2: i64): @@ -56,11 +60,12 @@ block0(v0: i64, v1: i64, v2: i64): return v3 } -; check: lrvgr %r2, %r4 -; nextln: lg %r0, 0(%r3) -; nextln: 0: nngrk %r1, %r0, %r2 ; csg %r0, %r1, 0(%r3) ; jglh 0b ; 1: -; nextln: lrvgr %r2, %r0 -; nextln: br %r14 +; block0: +; lrvgr %r5, %r4 +; lg %r0, 0(%r3) +; 0: nngrk %r1, %r0, %r5 ; csg %r0, %r1, 0(%r3) ; jglh 0b ; 1: +; lrvgr %r2, %r0 +; br %r14 function %atomic_rmw_nand_i32(i64, i64, i32) -> i32 { block0(v0: i64, v1: i64, v2: i32): @@ -68,11 +73,12 @@ block0(v0: i64, v1: i64, v2: i32): return v3 } -; check: lrvr %r2, %r4 -; nextln: l %r0, 0(%r3) -; nextln: 0: nnrk %r1, %r0, %r2 ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: -; nextln: lrvr %r2, %r0 -; nextln: br %r14 +; block0: +; lrvr %r5, %r4 +; l %r0, 0(%r3) +; 0: nnrk %r1, %r0, %r5 ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: +; lrvr %r2, %r0 +; br %r14 function %atomic_rmw_nand_i16(i64, i64, i16) -> i16 { block0(v0: i64, v1: i64, v2: i16): @@ -80,14 +86,15 @@ block0(v0: i64, v1: i64, v2: i16): return v3 } -; check: sllk %r2, %r3, 3 -; nextln: nill %r3, 65532 -; nextln: lrvr %r4, %r4 -; nextln: l %r0, 0(%r3) -; nextln: 0: rll %r1, %r0, 16(%r2) ; rnsbg %r1, %r4, 48, 64, 48 ; xilf %r1, 65535 ; rll %r1, %r1, 16(%r2) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: -; nextln: rll %r2, %r0, 0(%r2) -; nextln: lrvr %r2, %r2 -; nextln: br %r14 +; block0: +; sllk %r5, %r3, 3 +; nill %r3, 65532 +; lrvr %r2, %r4 +; l %r0, 0(%r3) +; 0: rll %r1, %r0, 16(%r5) ; rnsbg %r1, %r2, 48, 64, 48 ; xilf %r1, 65535 ; rll %r1, %r1, 16(%r5) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: +; rll %r2, %r0, 0(%r5) +; lrvr %r2, %r2 +; br %r14 function %atomic_rmw_nand_i8(i64, i64, i8) -> i8 { block0(v0: i64, v1: i64, v2: i8): @@ -95,11 +102,12 @@ block0(v0: i64, v1: i64, v2: i8): return v3 } -; check: sllk %r2, %r3, 3 -; nextln: nill %r3, 65532 -; nextln: lcr %r5, %r2 -; nextln: l %r0, 0(%r3) -; nextln: 0: rll %r1, %r0, 0(%r2) ; rnsbg %r1, %r4, 32, 40, 24 ; xilf %r1, 4278190080 ; rll %r1, %r1, 0(%r5) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: -; nextln: rll %r2, %r0, 8(%r2) -; nextln: br %r14 +; block0: +; sllk %r5, %r3, 3 +; nill %r3, 65532 +; lcr %r2, %r5 +; l %r0, 0(%r3) +; 0: rll %r1, %r0, 0(%r5) ; rnsbg %r1, %r4, 32, 40, 24 ; xilf %r1, 4278190080 ; rll %r1, %r1, 0(%r2) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: +; rll %r2, %r0, 8(%r5) +; br %r14 diff --git a/cranelift/filetests/filetests/isa/s390x/atomic_rmw-little.clif b/cranelift/filetests/filetests/isa/s390x/atomic_rmw-little.clif index 783c70a9cd..479e25e734 100644 --- a/cranelift/filetests/filetests/isa/s390x/atomic_rmw-little.clif +++ b/cranelift/filetests/filetests/isa/s390x/atomic_rmw-little.clif @@ -1,4 +1,4 @@ -test compile +test compile precise-output target s390x ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; @@ -11,11 +11,12 @@ block0(v0: i64, v1: i64, v2: i64): return v3 } -; check: lrvgr %r2, %r4 -; nextln: lg %r0, 0(%r3) -; nextln: 0: csg %r0, %r2, 0(%r3) ; jglh 0b ; 1: -; nextln: lrvgr %r2, %r0 -; nextln: br %r14 +; block0: +; lrvgr %r5, %r4 +; lg %r0, 0(%r3) +; 0: csg %r0, %r5, 0(%r3) ; jglh 0b ; 1: +; lrvgr %r2, %r0 +; br %r14 function %atomic_rmw_xchg_i32(i64, i64, i32) -> i32 { block0(v0: i64, v1: i64, v2: i32): @@ -23,11 +24,12 @@ block0(v0: i64, v1: i64, v2: i32): return v3 } -; check: lrvr %r2, %r4 -; nextln: l %r0, 0(%r3) -; nextln: 0: cs %r0, %r2, 0(%r3) ; jglh 0b ; 1: -; nextln: lrvr %r2, %r0 -; nextln: br %r14 +; block0: +; lrvr %r5, %r4 +; l %r0, 0(%r3) +; 0: cs %r0, %r5, 0(%r3) ; jglh 0b ; 1: +; lrvr %r2, %r0 +; br %r14 function %atomic_rmw_xchg_i16(i64, i64, i16) -> i16 { block0(v0: i64, v1: i64, v2: i16): @@ -35,14 +37,15 @@ block0(v0: i64, v1: i64, v2: i16): return v3 } -; check: sllk %r2, %r3, 3 -; nextln: nill %r3, 65532 -; nextln: lrvr %r4, %r4 -; nextln: l %r0, 0(%r3) -; nextln: 0: rll %r1, %r0, 16(%r2) ; risbgn %r1, %r4, 48, 64, 48 ; rll %r1, %r1, 16(%r2) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: -; nextln: rll %r2, %r0, 0(%r2) -; nextln: lrvr %r2, %r2 -; nextln: br %r14 +; block0: +; sllk %r5, %r3, 3 +; nill %r3, 65532 +; lrvr %r2, %r4 +; l %r0, 0(%r3) +; 0: rll %r1, %r0, 16(%r5) ; risbgn %r1, %r2, 48, 64, 48 ; rll %r1, %r1, 16(%r5) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: +; rll %r2, %r0, 0(%r5) +; lrvr %r2, %r2 +; br %r14 function %atomic_rmw_xchg_i8(i64, i64, i8) -> i8 { block0(v0: i64, v1: i64, v2: i8): @@ -50,18 +53,14 @@ block0(v0: i64, v1: i64, v2: i8): return v3 } -; check: sllk %r2, %r3, 3 -; nextln: nill %r3, 65532 -; nextln: lcr %r5, %r2 -; nextln: l %r0, 0(%r3) -; nextln: 0: rll %r1, %r0, 0(%r2) ; risbgn %r1, %r4, 32, 40, 24 ; rll %r1, %r1, 0(%r5) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: -; nextln: rll %r2, %r0, 8(%r2) -; nextln: br %r14 - - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; ATOMIC_RMW (ADD) -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; block0: +; sllk %r5, %r3, 3 +; nill %r3, 65532 +; lcr %r2, %r5 +; l %r0, 0(%r3) +; 0: rll %r1, %r0, 0(%r5) ; risbgn %r1, %r4, 32, 40, 24 ; rll %r1, %r1, 0(%r2) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: +; rll %r2, %r0, 8(%r5) +; br %r14 function %atomic_rmw_add_i64(i64, i64, i64) -> i64 { block0(v0: i64, v1: i64, v2: i64): @@ -69,10 +68,11 @@ block0(v0: i64, v1: i64, v2: i64): return v3 } -; check: lg %r0, 0(%r3) -; nextln: 0: lrvgr %r1, %r0 ; agr %r1, %r4 ; lrvgr %r1, %r1 ; csg %r0, %r1, 0(%r3) ; jglh 0b ; 1: -; nextln: lrvgr %r2, %r0 -; nextln: br %r14 +; block0: +; lg %r0, 0(%r3) +; 0: lrvgr %r1, %r0 ; agr %r1, %r4 ; lrvgr %r1, %r1 ; csg %r0, %r1, 0(%r3) ; jglh 0b ; 1: +; lrvgr %r2, %r0 +; br %r14 function %atomic_rmw_add_i32(i64, i64, i32) -> i32 { block0(v0: i64, v1: i64, v2: i32): @@ -80,10 +80,11 @@ block0(v0: i64, v1: i64, v2: i32): return v3 } -; check: l %r0, 0(%r3) -; nextln: 0: lrvr %r1, %r0 ; ar %r1, %r4 ; lrvr %r1, %r1 ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: -; nextln: lrvr %r2, %r0 -; nextln: br %r14 +; block0: +; l %r0, 0(%r3) +; 0: lrvr %r1, %r0 ; ar %r1, %r4 ; lrvr %r1, %r1 ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: +; lrvr %r2, %r0 +; br %r14 function %atomic_rmw_add_i16(i64, i64, i16) -> i16 { block0(v0: i64, v1: i64, v2: i16): @@ -91,14 +92,15 @@ block0(v0: i64, v1: i64, v2: i16): return v3 } -; check: sllk %r2, %r3, 3 -; nextln: nill %r3, 65532 -; nextln: sllk %r4, %r4, 16 -; nextln: l %r0, 0(%r3) -; nextln: 0: rll %r1, %r0, 16(%r2) ; lrvr %r1, %r1 ; ar %r1, %r4 ; lrvr %r1, %r1 ; rll %r1, %r1, 16(%r2) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: -; nextln: rll %r2, %r0, 0(%r2) -; nextln: lrvr %r2, %r2 -; nextln: br %r14 +; block0: +; sllk %r5, %r3, 3 +; nill %r3, 65532 +; sllk %r2, %r4, 16 +; l %r0, 0(%r3) +; 0: rll %r1, %r0, 16(%r5) ; lrvr %r1, %r1 ; ar %r1, %r2 ; lrvr %r1, %r1 ; rll %r1, %r1, 16(%r5) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: +; rll %r2, %r0, 0(%r5) +; lrvr %r2, %r2 +; br %r14 function %atomic_rmw_add_i8(i64, i64, i8) -> i8 { block0(v0: i64, v1: i64, v2: i8): @@ -106,18 +108,15 @@ block0(v0: i64, v1: i64, v2: i8): return v3 } -; check: sllk %r2, %r3, 3 -; nextln: nill %r3, 65532 -; nextln: sllk %r4, %r4, 24 -; nextln: lcr %r5, %r2 -; nextln: l %r0, 0(%r3) -; nextln: 0: rll %r1, %r0, 0(%r2) ; ar %r1, %r4 ; rll %r1, %r1, 0(%r5) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: -; nextln: rll %r2, %r0, 8(%r2) -; nextln: br %r14 - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; ATOMIC_RMW (SUB) -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; block0: +; sllk %r5, %r3, 3 +; nill %r3, 65532 +; sllk %r2, %r4, 24 +; lcr %r4, %r5 +; l %r0, 0(%r3) +; 0: rll %r1, %r0, 0(%r5) ; ar %r1, %r2 ; rll %r1, %r1, 0(%r4) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: +; rll %r2, %r0, 8(%r5) +; br %r14 function %atomic_rmw_sub_i64(i64, i64, i64) -> i64 { block0(v0: i64, v1: i64, v2: i64): @@ -125,10 +124,11 @@ block0(v0: i64, v1: i64, v2: i64): return v3 } -; check: lg %r0, 0(%r3) -; nextln: 0: lrvgr %r1, %r0 ; sgr %r1, %r4 ; lrvgr %r1, %r1 ; csg %r0, %r1, 0(%r3) ; jglh 0b ; 1: -; nextln: lrvgr %r2, %r0 -; nextln: br %r14 +; block0: +; lg %r0, 0(%r3) +; 0: lrvgr %r1, %r0 ; sgr %r1, %r4 ; lrvgr %r1, %r1 ; csg %r0, %r1, 0(%r3) ; jglh 0b ; 1: +; lrvgr %r2, %r0 +; br %r14 function %atomic_rmw_sub_i32(i64, i64, i32) -> i32 { block0(v0: i64, v1: i64, v2: i32): @@ -136,10 +136,11 @@ block0(v0: i64, v1: i64, v2: i32): return v3 } -; check: l %r0, 0(%r3) -; nextln: 0: lrvr %r1, %r0 ; sr %r1, %r4 ; lrvr %r1, %r1 ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: -; nextln: lrvr %r2, %r0 -; nextln: br %r14 +; block0: +; l %r0, 0(%r3) +; 0: lrvr %r1, %r0 ; sr %r1, %r4 ; lrvr %r1, %r1 ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: +; lrvr %r2, %r0 +; br %r14 function %atomic_rmw_sub_i16(i64, i64, i16) -> i16 { block0(v0: i64, v1: i64, v2: i16): @@ -147,14 +148,15 @@ block0(v0: i64, v1: i64, v2: i16): return v3 } -; check: sllk %r2, %r3, 3 -; nextln: nill %r3, 65532 -; nextln: sllk %r4, %r4, 16 -; nextln: l %r0, 0(%r3) -; nextln: 0: rll %r1, %r0, 16(%r2) ; lrvr %r1, %r1 ; sr %r1, %r4 ; lrvr %r1, %r1 ; rll %r1, %r1, 16(%r2) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: -; nextln: rll %r2, %r0, 0(%r2) -; nextln: lrvr %r2, %r2 -; nextln: br %r14 +; block0: +; sllk %r5, %r3, 3 +; nill %r3, 65532 +; sllk %r2, %r4, 16 +; l %r0, 0(%r3) +; 0: rll %r1, %r0, 16(%r5) ; lrvr %r1, %r1 ; sr %r1, %r2 ; lrvr %r1, %r1 ; rll %r1, %r1, 16(%r5) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: +; rll %r2, %r0, 0(%r5) +; lrvr %r2, %r2 +; br %r14 function %atomic_rmw_sub_i8(i64, i64, i8) -> i8 { block0(v0: i64, v1: i64, v2: i8): @@ -162,18 +164,15 @@ block0(v0: i64, v1: i64, v2: i8): return v3 } -; check: sllk %r2, %r3, 3 -; nextln: nill %r3, 65532 -; nextln: sllk %r4, %r4, 24 -; nextln: lcr %r5, %r2 -; nextln: l %r0, 0(%r3) -; nextln: 0: rll %r1, %r0, 0(%r2) ; sr %r1, %r4 ; rll %r1, %r1, 0(%r5) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: -; nextln: rll %r2, %r0, 8(%r2) -; nextln: br %r14 - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; ATOMIC_RMW (AND) -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; block0: +; sllk %r5, %r3, 3 +; nill %r3, 65532 +; sllk %r2, %r4, 24 +; lcr %r4, %r5 +; l %r0, 0(%r3) +; 0: rll %r1, %r0, 0(%r5) ; sr %r1, %r2 ; rll %r1, %r1, 0(%r4) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: +; rll %r2, %r0, 8(%r5) +; br %r14 function %atomic_rmw_and_i64(i64, i64, i64) -> i64 { block0(v0: i64, v1: i64, v2: i64): @@ -181,10 +180,11 @@ block0(v0: i64, v1: i64, v2: i64): return v3 } -; check: lrvgr %r2, %r4 -; nextln: lang %r2, %r2, 0(%r3) -; nextln: lrvgr %r2, %r2 -; nextln: br %r14 +; block0: +; lrvgr %r5, %r4 +; lang %r3, %r5, 0(%r3) +; lrvgr %r2, %r3 +; br %r14 function %atomic_rmw_and_i32(i64, i64, i32) -> i32 { block0(v0: i64, v1: i64, v2: i32): @@ -192,10 +192,11 @@ block0(v0: i64, v1: i64, v2: i32): return v3 } -; check: lrvr %r2, %r4 -; nextln: lan %r2, %r2, 0(%r3) -; nextln: lrvr %r2, %r2 -; nextln: br %r14 +; block0: +; lrvr %r5, %r4 +; lan %r3, %r5, 0(%r3) +; lrvr %r2, %r3 +; br %r14 function %atomic_rmw_and_i16(i64, i64, i16) -> i16 { block0(v0: i64, v1: i64, v2: i16): @@ -203,14 +204,15 @@ block0(v0: i64, v1: i64, v2: i16): return v3 } -; check: sllk %r2, %r3, 3 -; nextln: nill %r3, 65532 -; nextln: lrvr %r4, %r4 -; nextln: l %r0, 0(%r3) -; nextln: 0: rll %r1, %r0, 16(%r2) ; rnsbg %r1, %r4, 48, 64, 48 ; rll %r1, %r1, 16(%r2) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: -; nextln: rll %r2, %r0, 0(%r2) -; nextln: lrvr %r2, %r2 -; nextln: br %r14 +; block0: +; sllk %r5, %r3, 3 +; nill %r3, 65532 +; lrvr %r2, %r4 +; l %r0, 0(%r3) +; 0: rll %r1, %r0, 16(%r5) ; rnsbg %r1, %r2, 48, 64, 48 ; rll %r1, %r1, 16(%r5) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: +; rll %r2, %r0, 0(%r5) +; lrvr %r2, %r2 +; br %r14 function %atomic_rmw_and_i8(i64, i64, i8) -> i8 { block0(v0: i64, v1: i64, v2: i8): @@ -218,17 +220,14 @@ block0(v0: i64, v1: i64, v2: i8): return v3 } -; check: sllk %r2, %r3, 3 -; nextln: nill %r3, 65532 -; nextln: lcr %r5, %r2 -; nextln: l %r0, 0(%r3) -; nextln: 0: rll %r1, %r0, 0(%r2) ; rnsbg %r1, %r4, 32, 40, 24 ; rll %r1, %r1, 0(%r5) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: -; nextln: rll %r2, %r0, 8(%r2) -; nextln: br %r14 - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; ATOMIC_RMW (OR) -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; block0: +; sllk %r5, %r3, 3 +; nill %r3, 65532 +; lcr %r2, %r5 +; l %r0, 0(%r3) +; 0: rll %r1, %r0, 0(%r5) ; rnsbg %r1, %r4, 32, 40, 24 ; rll %r1, %r1, 0(%r2) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: +; rll %r2, %r0, 8(%r5) +; br %r14 function %atomic_rmw_or_i64(i64, i64, i64) -> i64 { block0(v0: i64, v1: i64, v2: i64): @@ -236,10 +235,11 @@ block0(v0: i64, v1: i64, v2: i64): return v3 } -; check: lrvgr %r2, %r4 -; nextln: laog %r2, %r2, 0(%r3) -; nextln: lrvgr %r2, %r2 -; nextln: br %r14 +; block0: +; lrvgr %r5, %r4 +; laog %r3, %r5, 0(%r3) +; lrvgr %r2, %r3 +; br %r14 function %atomic_rmw_or_i32(i64, i64, i32) -> i32 { block0(v0: i64, v1: i64, v2: i32): @@ -247,10 +247,11 @@ block0(v0: i64, v1: i64, v2: i32): return v3 } -; check: lrvr %r2, %r4 -; nextln: lao %r2, %r2, 0(%r3) -; nextln: lrvr %r2, %r2 -; nextln: br %r14 +; block0: +; lrvr %r5, %r4 +; lao %r3, %r5, 0(%r3) +; lrvr %r2, %r3 +; br %r14 function %atomic_rmw_or_i16(i64, i64, i16) -> i16 { block0(v0: i64, v1: i64, v2: i16): @@ -258,14 +259,15 @@ block0(v0: i64, v1: i64, v2: i16): return v3 } -; check: sllk %r2, %r3, 3 -; nextln: nill %r3, 65532 -; nextln: lrvr %r4, %r4 -; nextln: l %r0, 0(%r3) -; nextln: 0: rll %r1, %r0, 16(%r2) ; rosbg %r1, %r4, 48, 64, 48 ; rll %r1, %r1, 16(%r2) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: -; nextln: rll %r2, %r0, 0(%r2) -; nextln: lrvr %r2, %r2 -; nextln: br %r14 +; block0: +; sllk %r5, %r3, 3 +; nill %r3, 65532 +; lrvr %r2, %r4 +; l %r0, 0(%r3) +; 0: rll %r1, %r0, 16(%r5) ; rosbg %r1, %r2, 48, 64, 48 ; rll %r1, %r1, 16(%r5) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: +; rll %r2, %r0, 0(%r5) +; lrvr %r2, %r2 +; br %r14 function %atomic_rmw_or_i8(i64, i64, i8) -> i8 { block0(v0: i64, v1: i64, v2: i8): @@ -273,18 +275,14 @@ block0(v0: i64, v1: i64, v2: i8): return v3 } -; check: sllk %r2, %r3, 3 -; nextln: nill %r3, 65532 -; nextln: lcr %r5, %r2 -; nextln: l %r0, 0(%r3) -; nextln: 0: rll %r1, %r0, 0(%r2) ; rosbg %r1, %r4, 32, 40, 24 ; rll %r1, %r1, 0(%r5) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: -; nextln: rll %r2, %r0, 8(%r2) -; nextln: br %r14 - - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; ATOMIC_RMW (XOR) -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; block0: +; sllk %r5, %r3, 3 +; nill %r3, 65532 +; lcr %r2, %r5 +; l %r0, 0(%r3) +; 0: rll %r1, %r0, 0(%r5) ; rosbg %r1, %r4, 32, 40, 24 ; rll %r1, %r1, 0(%r2) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: +; rll %r2, %r0, 8(%r5) +; br %r14 function %atomic_rmw_xor_i64(i64, i64, i64) -> i64 { block0(v0: i64, v1: i64, v2: i64): @@ -292,10 +290,11 @@ block0(v0: i64, v1: i64, v2: i64): return v3 } -; check: lrvgr %r2, %r4 -; nextln: laxg %r2, %r2, 0(%r3) -; nextln: lrvgr %r2, %r2 -; nextln: br %r14 +; block0: +; lrvgr %r5, %r4 +; laxg %r3, %r5, 0(%r3) +; lrvgr %r2, %r3 +; br %r14 function %atomic_rmw_xor_i32(i64, i64, i32) -> i32 { block0(v0: i64, v1: i64, v2: i32): @@ -303,10 +302,11 @@ block0(v0: i64, v1: i64, v2: i32): return v3 } -; check: lrvr %r2, %r4 -; nextln: lax %r2, %r2, 0(%r3) -; nextln: lrvr %r2, %r2 -; nextln: br %r14 +; block0: +; lrvr %r5, %r4 +; lax %r3, %r5, 0(%r3) +; lrvr %r2, %r3 +; br %r14 function %atomic_rmw_xor_i16(i64, i64, i16) -> i16 { block0(v0: i64, v1: i64, v2: i16): @@ -314,14 +314,15 @@ block0(v0: i64, v1: i64, v2: i16): return v3 } -; check: sllk %r2, %r3, 3 -; nextln: nill %r3, 65532 -; nextln: lrvr %r4, %r4 -; nextln: l %r0, 0(%r3) -; nextln: 0: rll %r1, %r0, 16(%r2) ; rxsbg %r1, %r4, 48, 64, 48 ; rll %r1, %r1, 16(%r2) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: -; nextln: rll %r2, %r0, 0(%r2) -; nextln: lrvr %r2, %r2 -; nextln: br %r14 +; block0: +; sllk %r5, %r3, 3 +; nill %r3, 65532 +; lrvr %r2, %r4 +; l %r0, 0(%r3) +; 0: rll %r1, %r0, 16(%r5) ; rxsbg %r1, %r2, 48, 64, 48 ; rll %r1, %r1, 16(%r5) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: +; rll %r2, %r0, 0(%r5) +; lrvr %r2, %r2 +; br %r14 function %atomic_rmw_xor_i8(i64, i64, i8) -> i8 { block0(v0: i64, v1: i64, v2: i8): @@ -329,17 +330,14 @@ block0(v0: i64, v1: i64, v2: i8): return v3 } -; check: sllk %r2, %r3, 3 -; nextln: nill %r3, 65532 -; nextln: lcr %r5, %r2 -; nextln: l %r0, 0(%r3) -; nextln: 0: rll %r1, %r0, 0(%r2) ; rxsbg %r1, %r4, 32, 40, 24 ; rll %r1, %r1, 0(%r5) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: -; nextln: rll %r2, %r0, 8(%r2) -; nextln: br %r14 - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; ATOMIC_RMW (NAND) -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; block0: +; sllk %r5, %r3, 3 +; nill %r3, 65532 +; lcr %r2, %r5 +; l %r0, 0(%r3) +; 0: rll %r1, %r0, 0(%r5) ; rxsbg %r1, %r4, 32, 40, 24 ; rll %r1, %r1, 0(%r2) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: +; rll %r2, %r0, 8(%r5) +; br %r14 function %atomic_rmw_nand_i64(i64, i64, i64) -> i64 { block0(v0: i64, v1: i64, v2: i64): @@ -347,11 +345,12 @@ block0(v0: i64, v1: i64, v2: i64): return v3 } -; check: lrvgr %r2, %r4 -; nextln: lg %r0, 0(%r3) -; nextln: 0: ngrk %r1, %r0, %r2 ; xilf %r1, 4294967295 ; xihf %r1, 4294967295 ; csg %r0, %r1, 0(%r3) ; jglh 0b ; 1: -; nextln: lrvgr %r2, %r0 -; nextln: br %r14 +; block0: +; lrvgr %r5, %r4 +; lg %r0, 0(%r3) +; 0: ngrk %r1, %r0, %r5 ; xilf %r1, 4294967295 ; xihf %r1, 4294967295 ; csg %r0, %r1, 0(%r3) ; jglh 0b ; 1: +; lrvgr %r2, %r0 +; br %r14 function %atomic_rmw_nand_i32(i64, i64, i32) -> i32 { block0(v0: i64, v1: i64, v2: i32): @@ -359,11 +358,12 @@ block0(v0: i64, v1: i64, v2: i32): return v3 } -; check: lrvr %r2, %r4 -; nextln: l %r0, 0(%r3) -; nextln: 0: nrk %r1, %r0, %r2 ; xilf %r1, 4294967295 ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: -; nextln: lrvr %r2, %r0 -; nextln: br %r14 +; block0: +; lrvr %r5, %r4 +; l %r0, 0(%r3) +; 0: nrk %r1, %r0, %r5 ; xilf %r1, 4294967295 ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: +; lrvr %r2, %r0 +; br %r14 function %atomic_rmw_nand_i16(i64, i64, i16) -> i16 { block0(v0: i64, v1: i64, v2: i16): @@ -371,14 +371,15 @@ block0(v0: i64, v1: i64, v2: i16): return v3 } -; check: sllk %r2, %r3, 3 -; nextln: nill %r3, 65532 -; nextln: lrvr %r4, %r4 -; nextln: l %r0, 0(%r3) -; nextln: 0: rll %r1, %r0, 16(%r2) ; rnsbg %r1, %r4, 48, 64, 48 ; xilf %r1, 65535 ; rll %r1, %r1, 16(%r2) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: -; nextln: rll %r2, %r0, 0(%r2) -; nextln: lrvr %r2, %r2 -; nextln: br %r14 +; block0: +; sllk %r5, %r3, 3 +; nill %r3, 65532 +; lrvr %r2, %r4 +; l %r0, 0(%r3) +; 0: rll %r1, %r0, 16(%r5) ; rnsbg %r1, %r2, 48, 64, 48 ; xilf %r1, 65535 ; rll %r1, %r1, 16(%r5) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: +; rll %r2, %r0, 0(%r5) +; lrvr %r2, %r2 +; br %r14 function %atomic_rmw_nand_i8(i64, i64, i8) -> i8 { block0(v0: i64, v1: i64, v2: i8): @@ -386,17 +387,14 @@ block0(v0: i64, v1: i64, v2: i8): return v3 } -; check: sllk %r2, %r3, 3 -; nextln: nill %r3, 65532 -; nextln: lcr %r5, %r2 -; nextln: l %r0, 0(%r3) -; nextln: 0: rll %r1, %r0, 0(%r2) ; rnsbg %r1, %r4, 32, 40, 24 ; xilf %r1, 4278190080 ; rll %r1, %r1, 0(%r5) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: -; nextln: rll %r2, %r0, 8(%r2) -; nextln: br %r14 - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; ATOMIC_RMW (SMIN) -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; block0: +; sllk %r5, %r3, 3 +; nill %r3, 65532 +; lcr %r2, %r5 +; l %r0, 0(%r3) +; 0: rll %r1, %r0, 0(%r5) ; rnsbg %r1, %r4, 32, 40, 24 ; xilf %r1, 4278190080 ; rll %r1, %r1, 0(%r2) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: +; rll %r2, %r0, 8(%r5) +; br %r14 function %atomic_rmw_smin_i64(i64, i64, i64) -> i64 { block0(v0: i64, v1: i64, v2: i64): @@ -404,10 +402,11 @@ block0(v0: i64, v1: i64, v2: i64): return v3 } -; check: lg %r0, 0(%r3) -; nextln: 0: lrvgr %r1, %r0 ; cgr %r4, %r1 ; jgnl 1f ; lrvgr %r1, %r4 ; csg %r0, %r1, 0(%r3) ; jglh 0b ; 1: -; nextln: lrvgr %r2, %r0 -; nextln: br %r14 +; block0: +; lg %r0, 0(%r3) +; 0: lrvgr %r1, %r0 ; cgr %r4, %r1 ; jgnl 1f ; lrvgr %r1, %r4 ; csg %r0, %r1, 0(%r3) ; jglh 0b ; 1: +; lrvgr %r2, %r0 +; br %r14 function %atomic_rmw_smin_i32(i64, i64, i32) -> i32 { block0(v0: i64, v1: i64, v2: i32): @@ -415,10 +414,11 @@ block0(v0: i64, v1: i64, v2: i32): return v3 } -; check: l %r0, 0(%r3) -; nextln: 0: lrvr %r1, %r0 ; cr %r4, %r1 ; jgnl 1f ; lrvr %r1, %r4 ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: -; nextln: lrvr %r2, %r0 -; nextln: br %r14 +; block0: +; l %r0, 0(%r3) +; 0: lrvr %r1, %r0 ; cr %r4, %r1 ; jgnl 1f ; lrvr %r1, %r4 ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: +; lrvr %r2, %r0 +; br %r14 function %atomic_rmw_smin_i16(i64, i64, i16) -> i16 { block0(v0: i64, v1: i64, v2: i16): @@ -426,14 +426,15 @@ block0(v0: i64, v1: i64, v2: i16): return v3 } -; check: sllk %r2, %r3, 3 -; nextln: nill %r3, 65532 -; nextln: sllk %r4, %r4, 16 -; nextln: l %r0, 0(%r3) -; nextln: 0: rll %r1, %r0, 16(%r2) ; lrvr %r1, %r1 ; cr %r4, %r1 ; jgnl 1f ; risbgn %r1, %r4, 32, 48, 0 ; lrvr %r1, %r1 ; rll %r1, %r1, 16(%r2) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: -; nextln: rll %r2, %r0, 0(%r2) -; nextln: lrvr %r2, %r2 -; nextln: br %r14 +; block0: +; sllk %r5, %r3, 3 +; nill %r3, 65532 +; sllk %r2, %r4, 16 +; l %r0, 0(%r3) +; 0: rll %r1, %r0, 16(%r5) ; lrvr %r1, %r1 ; cr %r2, %r1 ; jgnl 1f ; risbgn %r1, %r2, 32, 48, 0 ; lrvr %r1, %r1 ; rll %r1, %r1, 16(%r5) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: +; rll %r2, %r0, 0(%r5) +; lrvr %r2, %r2 +; br %r14 function %atomic_rmw_smin_i8(i64, i64, i8) -> i8 { block0(v0: i64, v1: i64, v2: i8): @@ -441,18 +442,15 @@ block0(v0: i64, v1: i64, v2: i8): return v3 } -; check: sllk %r2, %r3, 3 -; nextln: nill %r3, 65532 -; nextln: sllk %r4, %r4, 24 -; nextln: lcr %r5, %r2 -; nextln: l %r0, 0(%r3) -; nextln: 0: rll %r1, %r0, 0(%r2) ; cr %r4, %r1 ; jgnl 1f ; risbgn %r1, %r4, 32, 40, 0 ; rll %r1, %r1, 0(%r5) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: -; nextln: rll %r2, %r0, 8(%r2) -; nextln: br %r14 - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; ATOMIC_RMW (SMAX) -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; block0: +; sllk %r5, %r3, 3 +; nill %r3, 65532 +; sllk %r2, %r4, 24 +; lcr %r4, %r5 +; l %r0, 0(%r3) +; 0: rll %r1, %r0, 0(%r5) ; cr %r2, %r1 ; jgnl 1f ; risbgn %r1, %r2, 32, 40, 0 ; rll %r1, %r1, 0(%r4) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: +; rll %r2, %r0, 8(%r5) +; br %r14 function %atomic_rmw_smax_i64(i64, i64, i64) -> i64 { block0(v0: i64, v1: i64, v2: i64): @@ -460,10 +458,11 @@ block0(v0: i64, v1: i64, v2: i64): return v3 } -; check: lg %r0, 0(%r3) -; nextln: 0: lrvgr %r1, %r0 ; cgr %r4, %r1 ; jgnh 1f ; lrvgr %r1, %r4 ; csg %r0, %r1, 0(%r3) ; jglh 0b ; 1: -; nextln: lrvgr %r2, %r0 -; nextln: br %r14 +; block0: +; lg %r0, 0(%r3) +; 0: lrvgr %r1, %r0 ; cgr %r4, %r1 ; jgnh 1f ; lrvgr %r1, %r4 ; csg %r0, %r1, 0(%r3) ; jglh 0b ; 1: +; lrvgr %r2, %r0 +; br %r14 function %atomic_rmw_smax_i32(i64, i64, i32) -> i32 { block0(v0: i64, v1: i64, v2: i32): @@ -471,10 +470,11 @@ block0(v0: i64, v1: i64, v2: i32): return v3 } -; check: l %r0, 0(%r3) -; nextln: 0: lrvr %r1, %r0 ; cr %r4, %r1 ; jgnh 1f ; lrvr %r1, %r4 ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: -; nextln: lrvr %r2, %r0 -; nextln: br %r14 +; block0: +; l %r0, 0(%r3) +; 0: lrvr %r1, %r0 ; cr %r4, %r1 ; jgnh 1f ; lrvr %r1, %r4 ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: +; lrvr %r2, %r0 +; br %r14 function %atomic_rmw_smax_i16(i64, i64, i16) -> i16 { block0(v0: i64, v1: i64, v2: i16): @@ -482,14 +482,15 @@ block0(v0: i64, v1: i64, v2: i16): return v3 } -; check: sllk %r2, %r3, 3 -; nextln: nill %r3, 65532 -; nextln: sllk %r4, %r4, 16 -; nextln: l %r0, 0(%r3) -; nextln: 0: rll %r1, %r0, 16(%r2) ; lrvr %r1, %r1 ; cr %r4, %r1 ; jgnh 1f ; risbgn %r1, %r4, 32, 48, 0 ; lrvr %r1, %r1 ; rll %r1, %r1, 16(%r2) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: -; nextln: rll %r2, %r0, 0(%r2) -; nextln: lrvr %r2, %r2 -; nextln: br %r14 +; block0: +; sllk %r5, %r3, 3 +; nill %r3, 65532 +; sllk %r2, %r4, 16 +; l %r0, 0(%r3) +; 0: rll %r1, %r0, 16(%r5) ; lrvr %r1, %r1 ; cr %r2, %r1 ; jgnh 1f ; risbgn %r1, %r2, 32, 48, 0 ; lrvr %r1, %r1 ; rll %r1, %r1, 16(%r5) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: +; rll %r2, %r0, 0(%r5) +; lrvr %r2, %r2 +; br %r14 function %atomic_rmw_smax_i8(i64, i64, i8) -> i8 { block0(v0: i64, v1: i64, v2: i8): @@ -497,18 +498,15 @@ block0(v0: i64, v1: i64, v2: i8): return v3 } -; check: sllk %r2, %r3, 3 -; nextln: nill %r3, 65532 -; nextln: sllk %r4, %r4, 24 -; nextln: lcr %r5, %r2 -; nextln: l %r0, 0(%r3) -; nextln: 0: rll %r1, %r0, 0(%r2) ; cr %r4, %r1 ; jgnh 1f ; risbgn %r1, %r4, 32, 40, 0 ; rll %r1, %r1, 0(%r5) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: -; nextln: rll %r2, %r0, 8(%r2) -; nextln: br %r14 - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; ATOMIC_RMW (UMIN) -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; block0: +; sllk %r5, %r3, 3 +; nill %r3, 65532 +; sllk %r2, %r4, 24 +; lcr %r4, %r5 +; l %r0, 0(%r3) +; 0: rll %r1, %r0, 0(%r5) ; cr %r2, %r1 ; jgnh 1f ; risbgn %r1, %r2, 32, 40, 0 ; rll %r1, %r1, 0(%r4) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: +; rll %r2, %r0, 8(%r5) +; br %r14 function %atomic_rmw_umin_i64(i64, i64, i64) -> i64 { block0(v0: i64, v1: i64, v2: i64): @@ -516,10 +514,11 @@ block0(v0: i64, v1: i64, v2: i64): return v3 } -; check: lg %r0, 0(%r3) -; nextln: 0: lrvgr %r1, %r0 ; clgr %r4, %r1 ; jgnl 1f ; lrvgr %r1, %r4 ; csg %r0, %r1, 0(%r3) ; jglh 0b ; 1: -; nextln: lrvgr %r2, %r0 -; nextln: br %r14 +; block0: +; lg %r0, 0(%r3) +; 0: lrvgr %r1, %r0 ; clgr %r4, %r1 ; jgnl 1f ; lrvgr %r1, %r4 ; csg %r0, %r1, 0(%r3) ; jglh 0b ; 1: +; lrvgr %r2, %r0 +; br %r14 function %atomic_rmw_umin_i32(i64, i64, i32) -> i32 { block0(v0: i64, v1: i64, v2: i32): @@ -527,10 +526,11 @@ block0(v0: i64, v1: i64, v2: i32): return v3 } -; check: l %r0, 0(%r3) -; nextln: 0: lrvr %r1, %r0 ; clr %r4, %r1 ; jgnl 1f ; lrvr %r1, %r4 ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: -; nextln: lrvr %r2, %r0 -; nextln: br %r14 +; block0: +; l %r0, 0(%r3) +; 0: lrvr %r1, %r0 ; clr %r4, %r1 ; jgnl 1f ; lrvr %r1, %r4 ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: +; lrvr %r2, %r0 +; br %r14 function %atomic_rmw_umin_i16(i64, i64, i16) -> i16 { block0(v0: i64, v1: i64, v2: i16): @@ -538,14 +538,15 @@ block0(v0: i64, v1: i64, v2: i16): return v3 } -; check: sllk %r2, %r3, 3 -; nextln: nill %r3, 65532 -; nextln: sllk %r4, %r4, 16 -; nextln: l %r0, 0(%r3) -; nextln: 0: rll %r1, %r0, 16(%r2) ; lrvr %r1, %r1 ; clr %r4, %r1 ; jgnl 1f ; risbgn %r1, %r4, 32, 48, 0 ; lrvr %r1, %r1 ; rll %r1, %r1, 16(%r2) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: -; nextln: rll %r2, %r0, 0(%r2) -; nextln: lrvr %r2, %r2 -; nextln: br %r14 +; block0: +; sllk %r5, %r3, 3 +; nill %r3, 65532 +; sllk %r2, %r4, 16 +; l %r0, 0(%r3) +; 0: rll %r1, %r0, 16(%r5) ; lrvr %r1, %r1 ; clr %r2, %r1 ; jgnl 1f ; risbgn %r1, %r2, 32, 48, 0 ; lrvr %r1, %r1 ; rll %r1, %r1, 16(%r5) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: +; rll %r2, %r0, 0(%r5) +; lrvr %r2, %r2 +; br %r14 function %atomic_rmw_umin_i8(i64, i64, i8) -> i8 { block0(v0: i64, v1: i64, v2: i8): @@ -553,18 +554,15 @@ block0(v0: i64, v1: i64, v2: i8): return v3 } -; check: sllk %r2, %r3, 3 -; nextln: nill %r3, 65532 -; nextln: sllk %r4, %r4, 24 -; nextln: lcr %r5, %r2 -; nextln: l %r0, 0(%r3) -; nextln: 0: rll %r1, %r0, 0(%r2) ; clr %r4, %r1 ; jgnl 1f ; risbgn %r1, %r4, 32, 40, 0 ; rll %r1, %r1, 0(%r5) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: -; nextln: rll %r2, %r0, 8(%r2) -; nextln: br %r14 - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; ATOMIC_RMW (UMAX) -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; block0: +; sllk %r5, %r3, 3 +; nill %r3, 65532 +; sllk %r2, %r4, 24 +; lcr %r4, %r5 +; l %r0, 0(%r3) +; 0: rll %r1, %r0, 0(%r5) ; clr %r2, %r1 ; jgnl 1f ; risbgn %r1, %r2, 32, 40, 0 ; rll %r1, %r1, 0(%r4) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: +; rll %r2, %r0, 8(%r5) +; br %r14 function %atomic_rmw_umax_i64(i64, i64, i64) -> i64 { block0(v0: i64, v1: i64, v2: i64): @@ -572,10 +570,11 @@ block0(v0: i64, v1: i64, v2: i64): return v3 } -; check: lg %r0, 0(%r3) -; nextln: 0: lrvgr %r1, %r0 ; clgr %r4, %r1 ; jgnh 1f ; lrvgr %r1, %r4 ; csg %r0, %r1, 0(%r3) ; jglh 0b ; 1: -; nextln: lrvgr %r2, %r0 -; nextln: br %r14 +; block0: +; lg %r0, 0(%r3) +; 0: lrvgr %r1, %r0 ; clgr %r4, %r1 ; jgnh 1f ; lrvgr %r1, %r4 ; csg %r0, %r1, 0(%r3) ; jglh 0b ; 1: +; lrvgr %r2, %r0 +; br %r14 function %atomic_rmw_umax_i32(i64, i64, i32) -> i32 { block0(v0: i64, v1: i64, v2: i32): @@ -583,10 +582,11 @@ block0(v0: i64, v1: i64, v2: i32): return v3 } -; check: l %r0, 0(%r3) -; nextln: 0: lrvr %r1, %r0 ; clr %r4, %r1 ; jgnh 1f ; lrvr %r1, %r4 ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: -; nextln: lrvr %r2, %r0 -; nextln: br %r14 +; block0: +; l %r0, 0(%r3) +; 0: lrvr %r1, %r0 ; clr %r4, %r1 ; jgnh 1f ; lrvr %r1, %r4 ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: +; lrvr %r2, %r0 +; br %r14 function %atomic_rmw_umax_i16(i64, i64, i16) -> i16 { block0(v0: i64, v1: i64, v2: i16): @@ -594,14 +594,15 @@ block0(v0: i64, v1: i64, v2: i16): return v3 } -; check: sllk %r2, %r3, 3 -; nextln: nill %r3, 65532 -; nextln: sllk %r4, %r4, 16 -; nextln: l %r0, 0(%r3) -; nextln: 0: rll %r1, %r0, 16(%r2) ; lrvr %r1, %r1 ; clr %r4, %r1 ; jgnh 1f ; risbgn %r1, %r4, 32, 48, 0 ; lrvr %r1, %r1 ; rll %r1, %r1, 16(%r2) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: -; nextln: rll %r2, %r0, 0(%r2) -; nextln: lrvr %r2, %r2 -; nextln: br %r14 +; block0: +; sllk %r5, %r3, 3 +; nill %r3, 65532 +; sllk %r2, %r4, 16 +; l %r0, 0(%r3) +; 0: rll %r1, %r0, 16(%r5) ; lrvr %r1, %r1 ; clr %r2, %r1 ; jgnh 1f ; risbgn %r1, %r2, 32, 48, 0 ; lrvr %r1, %r1 ; rll %r1, %r1, 16(%r5) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: +; rll %r2, %r0, 0(%r5) +; lrvr %r2, %r2 +; br %r14 function %atomic_rmw_umax_i8(i64, i64, i8) -> i8 { block0(v0: i64, v1: i64, v2: i8): @@ -609,12 +610,13 @@ block0(v0: i64, v1: i64, v2: i8): return v3 } -; check: sllk %r2, %r3, 3 -; nextln: nill %r3, 65532 -; nextln: sllk %r4, %r4, 24 -; nextln: lcr %r5, %r2 -; nextln: l %r0, 0(%r3) -; nextln: 0: rll %r1, %r0, 0(%r2) ; clr %r4, %r1 ; jgnh 1f ; risbgn %r1, %r4, 32, 40, 0 ; rll %r1, %r1, 0(%r5) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: -; nextln: rll %r2, %r0, 8(%r2) -; nextln: br %r14 +; block0: +; sllk %r5, %r3, 3 +; nill %r3, 65532 +; sllk %r2, %r4, 24 +; lcr %r4, %r5 +; l %r0, 0(%r3) +; 0: rll %r1, %r0, 0(%r5) ; clr %r2, %r1 ; jgnh 1f ; risbgn %r1, %r2, 32, 40, 0 ; rll %r1, %r1, 0(%r4) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: +; rll %r2, %r0, 8(%r5) +; br %r14 diff --git a/cranelift/filetests/filetests/isa/s390x/atomic_rmw.clif b/cranelift/filetests/filetests/isa/s390x/atomic_rmw.clif index 13b3adb1cd..648845f374 100644 --- a/cranelift/filetests/filetests/isa/s390x/atomic_rmw.clif +++ b/cranelift/filetests/filetests/isa/s390x/atomic_rmw.clif @@ -1,4 +1,4 @@ -test compile +test compile precise-output target s390x ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; @@ -11,10 +11,11 @@ block0(v0: i64, v1: i64, v2: i64): return v3 } -; check: lg %r0, 0(%r3) -; nextln: 0: csg %r0, %r4, 0(%r3) ; jglh 0b ; 1: -; nextln: lgr %r2, %r0 -; nextln: br %r14 +; block0: +; lg %r0, 0(%r3) +; 0: csg %r0, %r4, 0(%r3) ; jglh 0b ; 1: +; lgr %r2, %r0 +; br %r14 function %atomic_rmw_xchg_i32(i64, i64, i32) -> i32 { block0(v0: i64, v1: i64, v2: i32): @@ -22,10 +23,11 @@ block0(v0: i64, v1: i64, v2: i32): return v3 } -; check: l %r0, 0(%r3) -; nextln: 0: cs %r0, %r4, 0(%r3) ; jglh 0b ; 1: -; nextln: lr %r2, %r0 -; nextln: br %r14 +; block0: +; l %r0, 0(%r3) +; 0: cs %r0, %r4, 0(%r3) ; jglh 0b ; 1: +; lgr %r2, %r0 +; br %r14 function %atomic_rmw_xchg_i16(i64, i64, i16) -> i16 { block0(v0: i64, v1: i64, v2: i16): @@ -33,12 +35,13 @@ block0(v0: i64, v1: i64, v2: i16): return v3 } -; check: sllk %r2, %r3, 3 -; nextln: nill %r3, 65532 -; nextln: l %r0, 0(%r3) -; nextln: 0: rll %r1, %r0, 0(%r2) ; risbgn %r1, %r4, 32, 48, 16 ; rll %r1, %r1, 0(%r2) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: -; nextln: rll %r2, %r0, 16(%r2) -; nextln: br %r14 +; block0: +; sllk %r5, %r3, 3 +; nill %r3, 65532 +; l %r0, 0(%r3) +; 0: rll %r1, %r0, 0(%r5) ; risbgn %r1, %r4, 32, 48, 16 ; rll %r1, %r1, 0(%r5) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: +; rll %r2, %r0, 16(%r5) +; br %r14 function %atomic_rmw_xchg_i8(i64, i64, i8) -> i8 { block0(v0: i64, v1: i64, v2: i8): @@ -46,17 +49,14 @@ block0(v0: i64, v1: i64, v2: i8): return v3 } -; check: sllk %r2, %r3, 3 -; nextln: nill %r3, 65532 -; nextln: lcr %r5, %r2 -; nextln: l %r0, 0(%r3) -; nextln: 0: rll %r1, %r0, 0(%r2) ; risbgn %r1, %r4, 32, 40, 24 ; rll %r1, %r1, 0(%r5) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: -; nextln: rll %r2, %r0, 8(%r2) -; nextln: br %r14 - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; ATOMIC_RMW (ADD) -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; block0: +; sllk %r5, %r3, 3 +; nill %r3, 65532 +; lcr %r2, %r5 +; l %r0, 0(%r3) +; 0: rll %r1, %r0, 0(%r5) ; risbgn %r1, %r4, 32, 40, 24 ; rll %r1, %r1, 0(%r2) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: +; rll %r2, %r0, 8(%r5) +; br %r14 function %atomic_rmw_add_i64(i64, i64) -> i64 { block0(v0: i64, v1: i64): @@ -64,8 +64,9 @@ block0(v0: i64, v1: i64): return v2 } -; check: laag %r2, %r3, 0(%r2) -; nextln: br %r14 +; block0: +; laag %r2, %r3, 0(%r2) +; br %r14 function %atomic_rmw_add_i32(i64, i32) -> i32 { block0(v0: i64, v1: i32): @@ -73,8 +74,9 @@ block0(v0: i64, v1: i32): return v2 } -; check: laa %r2, %r3, 0(%r2) -; nextln: br %r14 +; block0: +; laa %r2, %r3, 0(%r2) +; br %r14 function %atomic_rmw_add_i16(i64, i64, i16) -> i16 { block0(v0: i64, v1: i64, v2: i16): @@ -82,13 +84,14 @@ block0(v0: i64, v1: i64, v2: i16): return v3 } -; check: sllk %r2, %r3, 3 -; nextln: nill %r3, 65532 -; nextln: sllk %r4, %r4, 16 -; nextln: l %r0, 0(%r3) -; nextln: 0: rll %r1, %r0, 0(%r2) ; ar %r1, %r4 ; rll %r1, %r1, 0(%r2) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: -; nextln: rll %r2, %r0, 16(%r2) -; nextln: br %r14 +; block0: +; sllk %r5, %r3, 3 +; nill %r3, 65532 +; sllk %r2, %r4, 16 +; l %r0, 0(%r3) +; 0: rll %r1, %r0, 0(%r5) ; ar %r1, %r2 ; rll %r1, %r1, 0(%r5) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: +; rll %r2, %r0, 16(%r5) +; br %r14 function %atomic_rmw_add_i8(i64, i64, i8) -> i8 { block0(v0: i64, v1: i64, v2: i8): @@ -96,18 +99,15 @@ block0(v0: i64, v1: i64, v2: i8): return v3 } -; check: sllk %r2, %r3, 3 -; nextln: nill %r3, 65532 -; nextln: sllk %r4, %r4, 24 -; nextln: lcr %r5, %r2 -; nextln: l %r0, 0(%r3) -; nextln: 0: rll %r1, %r0, 0(%r2) ; ar %r1, %r4 ; rll %r1, %r1, 0(%r5) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: -; nextln: rll %r2, %r0, 8(%r2) -; nextln: br %r14 - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; ATOMIC_RMW (SUB) -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; block0: +; sllk %r5, %r3, 3 +; nill %r3, 65532 +; sllk %r2, %r4, 24 +; lcr %r4, %r5 +; l %r0, 0(%r3) +; 0: rll %r1, %r0, 0(%r5) ; ar %r1, %r2 ; rll %r1, %r1, 0(%r4) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: +; rll %r2, %r0, 8(%r5) +; br %r14 function %atomic_rmw_sub_i64(i64, i64) -> i64 { block0(v0: i64, v1: i64): @@ -115,9 +115,10 @@ block0(v0: i64, v1: i64): return v2 } -; check: lcgr %r3, %r3 -; nextln: laag %r2, %r3, 0(%r2) -; nextln: br %r14 +; block0: +; lcgr %r3, %r3 +; laag %r2, %r3, 0(%r2) +; br %r14 function %atomic_rmw_sub_i32(i64, i32) -> i32 { block0(v0: i64, v1: i32): @@ -125,9 +126,10 @@ block0(v0: i64, v1: i32): return v2 } -; check: lcr %r3, %r3 -; nextln: laa %r2, %r3, 0(%r2) -; nextln: br %r14 +; block0: +; lcr %r3, %r3 +; laa %r2, %r3, 0(%r2) +; br %r14 function %atomic_rmw_sub_i16(i64, i64, i16) -> i16 { block0(v0: i64, v1: i64, v2: i16): @@ -135,13 +137,14 @@ block0(v0: i64, v1: i64, v2: i16): return v3 } -; check: sllk %r2, %r3, 3 -; nextln: nill %r3, 65532 -; nextln: sllk %r4, %r4, 16 -; nextln: l %r0, 0(%r3) -; nextln: 0: rll %r1, %r0, 0(%r2) ; sr %r1, %r4 ; rll %r1, %r1, 0(%r2) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: -; nextln: rll %r2, %r0, 16(%r2) -; nextln: br %r14 +; block0: +; sllk %r5, %r3, 3 +; nill %r3, 65532 +; sllk %r2, %r4, 16 +; l %r0, 0(%r3) +; 0: rll %r1, %r0, 0(%r5) ; sr %r1, %r2 ; rll %r1, %r1, 0(%r5) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: +; rll %r2, %r0, 16(%r5) +; br %r14 function %atomic_rmw_sub_i8(i64, i64, i8) -> i8 { block0(v0: i64, v1: i64, v2: i8): @@ -149,18 +152,15 @@ block0(v0: i64, v1: i64, v2: i8): return v3 } -; check: sllk %r2, %r3, 3 -; nextln: nill %r3, 65532 -; nextln: sllk %r4, %r4, 24 -; nextln: lcr %r5, %r2 -; nextln: l %r0, 0(%r3) -; nextln: 0: rll %r1, %r0, 0(%r2) ; sr %r1, %r4 ; rll %r1, %r1, 0(%r5) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: -; nextln: rll %r2, %r0, 8(%r2) -; nextln: br %r14 - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; ATOMIC_RMW (AND) -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; block0: +; sllk %r5, %r3, 3 +; nill %r3, 65532 +; sllk %r2, %r4, 24 +; lcr %r4, %r5 +; l %r0, 0(%r3) +; 0: rll %r1, %r0, 0(%r5) ; sr %r1, %r2 ; rll %r1, %r1, 0(%r4) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: +; rll %r2, %r0, 8(%r5) +; br %r14 function %atomic_rmw_and_i64(i64, i64) -> i64 { block0(v0: i64, v1: i64): @@ -168,8 +168,9 @@ block0(v0: i64, v1: i64): return v2 } -; check: lang %r2, %r3, 0(%r2) -; nextln: br %r14 +; block0: +; lang %r2, %r3, 0(%r2) +; br %r14 function %atomic_rmw_and_i32(i64, i32) -> i32 { block0(v0: i64, v1: i32): @@ -177,8 +178,9 @@ block0(v0: i64, v1: i32): return v2 } -; check: lan %r2, %r3, 0(%r2) -; nextln: br %r14 +; block0: +; lan %r2, %r3, 0(%r2) +; br %r14 function %atomic_rmw_and_i16(i64, i64, i16) -> i16 { block0(v0: i64, v1: i64, v2: i16): @@ -186,12 +188,13 @@ block0(v0: i64, v1: i64, v2: i16): return v3 } -; check: sllk %r2, %r3, 3 -; nextln: nill %r3, 65532 -; nextln: l %r0, 0(%r3) -; nextln: 0: rll %r1, %r0, 0(%r2) ; rnsbg %r1, %r4, 32, 48, 16 ; rll %r1, %r1, 0(%r2) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: -; nextln: rll %r2, %r0, 16(%r2) -; nextln: br %r14 +; block0: +; sllk %r5, %r3, 3 +; nill %r3, 65532 +; l %r0, 0(%r3) +; 0: rll %r1, %r0, 0(%r5) ; rnsbg %r1, %r4, 32, 48, 16 ; rll %r1, %r1, 0(%r5) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: +; rll %r2, %r0, 16(%r5) +; br %r14 function %atomic_rmw_and_i8(i64, i64, i8) -> i8 { block0(v0: i64, v1: i64, v2: i8): @@ -199,17 +202,14 @@ block0(v0: i64, v1: i64, v2: i8): return v3 } -; check: sllk %r2, %r3, 3 -; nextln: nill %r3, 65532 -; nextln: lcr %r5, %r2 -; nextln: l %r0, 0(%r3) -; nextln: 0: rll %r1, %r0, 0(%r2) ; rnsbg %r1, %r4, 32, 40, 24 ; rll %r1, %r1, 0(%r5) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: -; nextln: rll %r2, %r0, 8(%r2) -; nextln: br %r14 - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; ATOMIC_RMW (OR) -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; block0: +; sllk %r5, %r3, 3 +; nill %r3, 65532 +; lcr %r2, %r5 +; l %r0, 0(%r3) +; 0: rll %r1, %r0, 0(%r5) ; rnsbg %r1, %r4, 32, 40, 24 ; rll %r1, %r1, 0(%r2) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: +; rll %r2, %r0, 8(%r5) +; br %r14 function %atomic_rmw_or_i64(i64, i64) -> i64 { block0(v0: i64, v1: i64): @@ -217,8 +217,9 @@ block0(v0: i64, v1: i64): return v2 } -; check: laog %r2, %r3, 0(%r2) -; nextln: br %r14 +; block0: +; laog %r2, %r3, 0(%r2) +; br %r14 function %atomic_rmw_or_i32(i64, i32) -> i32 { block0(v0: i64, v1: i32): @@ -226,8 +227,9 @@ block0(v0: i64, v1: i32): return v2 } -; check: lao %r2, %r3, 0(%r2) -; nextln: br %r14 +; block0: +; lao %r2, %r3, 0(%r2) +; br %r14 function %atomic_rmw_or_i16(i64, i64, i16) -> i16 { block0(v0: i64, v1: i64, v2: i16): @@ -235,12 +237,13 @@ block0(v0: i64, v1: i64, v2: i16): return v3 } -; check: sllk %r2, %r3, 3 -; nextln: nill %r3, 65532 -; nextln: l %r0, 0(%r3) -; nextln: 0: rll %r1, %r0, 0(%r2) ; rosbg %r1, %r4, 32, 48, 16 ; rll %r1, %r1, 0(%r2) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: -; nextln: rll %r2, %r0, 16(%r2) -; nextln: br %r14 +; block0: +; sllk %r5, %r3, 3 +; nill %r3, 65532 +; l %r0, 0(%r3) +; 0: rll %r1, %r0, 0(%r5) ; rosbg %r1, %r4, 32, 48, 16 ; rll %r1, %r1, 0(%r5) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: +; rll %r2, %r0, 16(%r5) +; br %r14 function %atomic_rmw_or_i8(i64, i64, i8) -> i8 { block0(v0: i64, v1: i64, v2: i8): @@ -248,17 +251,14 @@ block0(v0: i64, v1: i64, v2: i8): return v3 } -; check: sllk %r2, %r3, 3 -; nextln: nill %r3, 65532 -; nextln: lcr %r5, %r2 -; nextln: l %r0, 0(%r3) -; nextln: 0: rll %r1, %r0, 0(%r2) ; rosbg %r1, %r4, 32, 40, 24 ; rll %r1, %r1, 0(%r5) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: -; nextln: rll %r2, %r0, 8(%r2) -; nextln: br %r14 - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; ATOMIC_RMW (XOR) -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; block0: +; sllk %r5, %r3, 3 +; nill %r3, 65532 +; lcr %r2, %r5 +; l %r0, 0(%r3) +; 0: rll %r1, %r0, 0(%r5) ; rosbg %r1, %r4, 32, 40, 24 ; rll %r1, %r1, 0(%r2) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: +; rll %r2, %r0, 8(%r5) +; br %r14 function %atomic_rmw_xor_i64(i64, i64) -> i64 { block0(v0: i64, v1: i64): @@ -266,8 +266,9 @@ block0(v0: i64, v1: i64): return v2 } -; check: laxg %r2, %r3, 0(%r2) -; nextln: br %r14 +; block0: +; laxg %r2, %r3, 0(%r2) +; br %r14 function %atomic_rmw_xor_i32(i64, i32) -> i32 { block0(v0: i64, v1: i32): @@ -275,8 +276,9 @@ block0(v0: i64, v1: i32): return v2 } -; check: lax %r2, %r3, 0(%r2) -; nextln: br %r14 +; block0: +; lax %r2, %r3, 0(%r2) +; br %r14 function %atomic_rmw_xor_i16(i64, i64, i16) -> i16 { block0(v0: i64, v1: i64, v2: i16): @@ -284,12 +286,13 @@ block0(v0: i64, v1: i64, v2: i16): return v3 } -; check: sllk %r2, %r3, 3 -; nextln: nill %r3, 65532 -; nextln: l %r0, 0(%r3) -; nextln: 0: rll %r1, %r0, 0(%r2) ; rxsbg %r1, %r4, 32, 48, 16 ; rll %r1, %r1, 0(%r2) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: -; nextln: rll %r2, %r0, 16(%r2) -; nextln: br %r14 +; block0: +; sllk %r5, %r3, 3 +; nill %r3, 65532 +; l %r0, 0(%r3) +; 0: rll %r1, %r0, 0(%r5) ; rxsbg %r1, %r4, 32, 48, 16 ; rll %r1, %r1, 0(%r5) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: +; rll %r2, %r0, 16(%r5) +; br %r14 function %atomic_rmw_xor_i8(i64, i64, i8) -> i8 { block0(v0: i64, v1: i64, v2: i8): @@ -297,17 +300,14 @@ block0(v0: i64, v1: i64, v2: i8): return v3 } -; check: sllk %r2, %r3, 3 -; nextln: nill %r3, 65532 -; nextln: lcr %r5, %r2 -; nextln: l %r0, 0(%r3) -; nextln: 0: rll %r1, %r0, 0(%r2) ; rxsbg %r1, %r4, 32, 40, 24 ; rll %r1, %r1, 0(%r5) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: -; nextln: rll %r2, %r0, 8(%r2) -; nextln: br %r14 - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; ATOMIC_RMW (NAND) -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; block0: +; sllk %r5, %r3, 3 +; nill %r3, 65532 +; lcr %r2, %r5 +; l %r0, 0(%r3) +; 0: rll %r1, %r0, 0(%r5) ; rxsbg %r1, %r4, 32, 40, 24 ; rll %r1, %r1, 0(%r2) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: +; rll %r2, %r0, 8(%r5) +; br %r14 function %atomic_rmw_nand_i64(i64, i64, i64) -> i64 { block0(v0: i64, v1: i64, v2: i64): @@ -315,10 +315,11 @@ block0(v0: i64, v1: i64, v2: i64): return v3 } -; check: lg %r0, 0(%r3) -; nextln: 0: ngrk %r1, %r0, %r4 ; xilf %r1, 4294967295 ; xihf %r1, 4294967295 ; csg %r0, %r1, 0(%r3) ; jglh 0b ; 1: -; nextln: lgr %r2, %r0 -; nextln: br %r14 +; block0: +; lg %r0, 0(%r3) +; 0: ngrk %r1, %r0, %r4 ; xilf %r1, 4294967295 ; xihf %r1, 4294967295 ; csg %r0, %r1, 0(%r3) ; jglh 0b ; 1: +; lgr %r2, %r0 +; br %r14 function %atomic_rmw_nand_i32(i64, i64, i32) -> i32 { block0(v0: i64, v1: i64, v2: i32): @@ -326,10 +327,11 @@ block0(v0: i64, v1: i64, v2: i32): return v3 } -; check: l %r0, 0(%r3) -; nextln: 0: nrk %r1, %r0, %r4 ; xilf %r1, 4294967295 ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: -; nextln: lr %r2, %r0 -; nextln: br %r14 +; block0: +; l %r0, 0(%r3) +; 0: nrk %r1, %r0, %r4 ; xilf %r1, 4294967295 ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: +; lgr %r2, %r0 +; br %r14 function %atomic_rmw_nand_i16(i64, i64, i16) -> i16 { block0(v0: i64, v1: i64, v2: i16): @@ -337,12 +339,13 @@ block0(v0: i64, v1: i64, v2: i16): return v3 } -; check: sllk %r2, %r3, 3 -; nextln: nill %r3, 65532 -; nextln: l %r0, 0(%r3) -; nextln: 0: rll %r1, %r0, 0(%r2) ; rnsbg %r1, %r4, 32, 48, 16 ; xilf %r1, 4294901760 ; rll %r1, %r1, 0(%r2) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: -; nextln: rll %r2, %r0, 16(%r2) -; nextln: br %r14 +; block0: +; sllk %r5, %r3, 3 +; nill %r3, 65532 +; l %r0, 0(%r3) +; 0: rll %r1, %r0, 0(%r5) ; rnsbg %r1, %r4, 32, 48, 16 ; xilf %r1, 4294901760 ; rll %r1, %r1, 0(%r5) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: +; rll %r2, %r0, 16(%r5) +; br %r14 function %atomic_rmw_nand_i8(i64, i64, i8) -> i8 { block0(v0: i64, v1: i64, v2: i8): @@ -350,17 +353,14 @@ block0(v0: i64, v1: i64, v2: i8): return v3 } -; check: sllk %r2, %r3, 3 -; nextln: nill %r3, 65532 -; nextln: lcr %r5, %r2 -; nextln: l %r0, 0(%r3) -; nextln: 0: rll %r1, %r0, 0(%r2) ; rnsbg %r1, %r4, 32, 40, 24 ; xilf %r1, 4278190080 ; rll %r1, %r1, 0(%r5) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: -; nextln: rll %r2, %r0, 8(%r2) -; nextln: br %r14 - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; ATOMIC_RMW (SMIN) -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; block0: +; sllk %r5, %r3, 3 +; nill %r3, 65532 +; lcr %r2, %r5 +; l %r0, 0(%r3) +; 0: rll %r1, %r0, 0(%r5) ; rnsbg %r1, %r4, 32, 40, 24 ; xilf %r1, 4278190080 ; rll %r1, %r1, 0(%r2) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: +; rll %r2, %r0, 8(%r5) +; br %r14 function %atomic_rmw_smin_i64(i64, i64, i64) -> i64 { block0(v0: i64, v1: i64, v2: i64): @@ -368,10 +368,11 @@ block0(v0: i64, v1: i64, v2: i64): return v3 } -; check: lg %r0, 0(%r3) -; nextln: 0: cgr %r4, %r0 ; jgnl 1f ; csg %r0, %r4, 0(%r3) ; jglh 0b ; 1: -; nextln: lgr %r2, %r0 -; nextln: br %r14 +; block0: +; lg %r0, 0(%r3) +; 0: cgr %r4, %r0 ; jgnl 1f ; csg %r0, %r4, 0(%r3) ; jglh 0b ; 1: +; lgr %r2, %r0 +; br %r14 function %atomic_rmw_smin_i32(i64, i64, i32) -> i32 { block0(v0: i64, v1: i64, v2: i32): @@ -379,10 +380,11 @@ block0(v0: i64, v1: i64, v2: i32): return v3 } -; check: l %r0, 0(%r3) -; nextln: 0: cr %r4, %r0 ; jgnl 1f ; cs %r0, %r4, 0(%r3) ; jglh 0b ; 1: -; nextln: lr %r2, %r0 -; nextln: br %r14 +; block0: +; l %r0, 0(%r3) +; 0: cr %r4, %r0 ; jgnl 1f ; cs %r0, %r4, 0(%r3) ; jglh 0b ; 1: +; lgr %r2, %r0 +; br %r14 function %atomic_rmw_smin_i16(i64, i64, i16) -> i16 { block0(v0: i64, v1: i64, v2: i16): @@ -390,13 +392,14 @@ block0(v0: i64, v1: i64, v2: i16): return v3 } -; check: sllk %r2, %r3, 3 -; nextln: nill %r3, 65532 -; nextln: sllk %r4, %r4, 16 -; nextln: l %r0, 0(%r3) -; nextln: 0: rll %r1, %r0, 0(%r2) ; cr %r4, %r1 ; jgnl 1f ; risbgn %r1, %r4, 32, 48, 0 ; rll %r1, %r1, 0(%r2) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: -; nextln: rll %r2, %r0, 16(%r2) -; nextln: br %r14 +; block0: +; sllk %r5, %r3, 3 +; nill %r3, 65532 +; sllk %r2, %r4, 16 +; l %r0, 0(%r3) +; 0: rll %r1, %r0, 0(%r5) ; cr %r2, %r1 ; jgnl 1f ; risbgn %r1, %r2, 32, 48, 0 ; rll %r1, %r1, 0(%r5) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: +; rll %r2, %r0, 16(%r5) +; br %r14 function %atomic_rmw_smin_i8(i64, i64, i8) -> i8 { block0(v0: i64, v1: i64, v2: i8): @@ -404,18 +407,15 @@ block0(v0: i64, v1: i64, v2: i8): return v3 } -; check: sllk %r2, %r3, 3 -; nextln: nill %r3, 65532 -; nextln: sllk %r4, %r4, 24 -; nextln: lcr %r5, %r2 -; nextln: l %r0, 0(%r3) -; nextln: 0: rll %r1, %r0, 0(%r2) ; cr %r4, %r1 ; jgnl 1f ; risbgn %r1, %r4, 32, 40, 0 ; rll %r1, %r1, 0(%r5) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: -; nextln: rll %r2, %r0, 8(%r2) -; nextln: br %r14 - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; ATOMIC_RMW (SMAX) -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; block0: +; sllk %r5, %r3, 3 +; nill %r3, 65532 +; sllk %r2, %r4, 24 +; lcr %r4, %r5 +; l %r0, 0(%r3) +; 0: rll %r1, %r0, 0(%r5) ; cr %r2, %r1 ; jgnl 1f ; risbgn %r1, %r2, 32, 40, 0 ; rll %r1, %r1, 0(%r4) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: +; rll %r2, %r0, 8(%r5) +; br %r14 function %atomic_rmw_smax_i64(i64, i64, i64) -> i64 { block0(v0: i64, v1: i64, v2: i64): @@ -423,10 +423,11 @@ block0(v0: i64, v1: i64, v2: i64): return v3 } -; check: lg %r0, 0(%r3) -; nextln: 0: cgr %r4, %r0 ; jgnh 1f ; csg %r0, %r4, 0(%r3) ; jglh 0b ; 1: -; nextln: lgr %r2, %r0 -; nextln: br %r14 +; block0: +; lg %r0, 0(%r3) +; 0: cgr %r4, %r0 ; jgnh 1f ; csg %r0, %r4, 0(%r3) ; jglh 0b ; 1: +; lgr %r2, %r0 +; br %r14 function %atomic_rmw_smax_i32(i64, i64, i32) -> i32 { block0(v0: i64, v1: i64, v2: i32): @@ -434,10 +435,11 @@ block0(v0: i64, v1: i64, v2: i32): return v3 } -; check: l %r0, 0(%r3) -; nextln: 0: cr %r4, %r0 ; jgnh 1f ; cs %r0, %r4, 0(%r3) ; jglh 0b ; 1: -; nextln: lr %r2, %r0 -; nextln: br %r14 +; block0: +; l %r0, 0(%r3) +; 0: cr %r4, %r0 ; jgnh 1f ; cs %r0, %r4, 0(%r3) ; jglh 0b ; 1: +; lgr %r2, %r0 +; br %r14 function %atomic_rmw_smax_i16(i64, i64, i16) -> i16 { block0(v0: i64, v1: i64, v2: i16): @@ -445,13 +447,14 @@ block0(v0: i64, v1: i64, v2: i16): return v3 } -; check: sllk %r2, %r3, 3 -; nextln: nill %r3, 65532 -; nextln: sllk %r4, %r4, 16 -; nextln: l %r0, 0(%r3) -; nextln: 0: rll %r1, %r0, 0(%r2) ; cr %r4, %r1 ; jgnh 1f ; risbgn %r1, %r4, 32, 48, 0 ; rll %r1, %r1, 0(%r2) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: -; nextln: rll %r2, %r0, 16(%r2) -; nextln: br %r14 +; block0: +; sllk %r5, %r3, 3 +; nill %r3, 65532 +; sllk %r2, %r4, 16 +; l %r0, 0(%r3) +; 0: rll %r1, %r0, 0(%r5) ; cr %r2, %r1 ; jgnh 1f ; risbgn %r1, %r2, 32, 48, 0 ; rll %r1, %r1, 0(%r5) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: +; rll %r2, %r0, 16(%r5) +; br %r14 function %atomic_rmw_smax_i8(i64, i64, i8) -> i8 { block0(v0: i64, v1: i64, v2: i8): @@ -459,18 +462,15 @@ block0(v0: i64, v1: i64, v2: i8): return v3 } -; check: sllk %r2, %r3, 3 -; nextln: nill %r3, 65532 -; nextln: sllk %r4, %r4, 24 -; nextln: lcr %r5, %r2 -; nextln: l %r0, 0(%r3) -; nextln: 0: rll %r1, %r0, 0(%r2) ; cr %r4, %r1 ; jgnh 1f ; risbgn %r1, %r4, 32, 40, 0 ; rll %r1, %r1, 0(%r5) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: -; nextln: rll %r2, %r0, 8(%r2) -; nextln: br %r14 - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; ATOMIC_RMW (UMIN) -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; block0: +; sllk %r5, %r3, 3 +; nill %r3, 65532 +; sllk %r2, %r4, 24 +; lcr %r4, %r5 +; l %r0, 0(%r3) +; 0: rll %r1, %r0, 0(%r5) ; cr %r2, %r1 ; jgnh 1f ; risbgn %r1, %r2, 32, 40, 0 ; rll %r1, %r1, 0(%r4) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: +; rll %r2, %r0, 8(%r5) +; br %r14 function %atomic_rmw_umin_i64(i64, i64, i64) -> i64 { block0(v0: i64, v1: i64, v2: i64): @@ -478,10 +478,11 @@ block0(v0: i64, v1: i64, v2: i64): return v3 } -; check: lg %r0, 0(%r3) -; nextln: 0: clgr %r4, %r0 ; jgnl 1f ; csg %r0, %r4, 0(%r3) ; jglh 0b ; 1: -; nextln: lgr %r2, %r0 -; nextln: br %r14 +; block0: +; lg %r0, 0(%r3) +; 0: clgr %r4, %r0 ; jgnl 1f ; csg %r0, %r4, 0(%r3) ; jglh 0b ; 1: +; lgr %r2, %r0 +; br %r14 function %atomic_rmw_umin_i32(i64, i64, i32) -> i32 { block0(v0: i64, v1: i64, v2: i32): @@ -489,10 +490,11 @@ block0(v0: i64, v1: i64, v2: i32): return v3 } -; check: l %r0, 0(%r3) -; nextln: 0: clr %r4, %r0 ; jgnl 1f ; cs %r0, %r4, 0(%r3) ; jglh 0b ; 1: -; nextln: lr %r2, %r0 -; nextln: br %r14 +; block0: +; l %r0, 0(%r3) +; 0: clr %r4, %r0 ; jgnl 1f ; cs %r0, %r4, 0(%r3) ; jglh 0b ; 1: +; lgr %r2, %r0 +; br %r14 function %atomic_rmw_umin_i16(i64, i64, i16) -> i16 { block0(v0: i64, v1: i64, v2: i16): @@ -500,13 +502,14 @@ block0(v0: i64, v1: i64, v2: i16): return v3 } -; check: sllk %r2, %r3, 3 -; nextln: nill %r3, 65532 -; nextln: sllk %r4, %r4, 16 -; nextln: l %r0, 0(%r3) -; nextln: 0: rll %r1, %r0, 0(%r2) ; clr %r4, %r1 ; jgnl 1f ; risbgn %r1, %r4, 32, 48, 0 ; rll %r1, %r1, 0(%r2) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: -; nextln: rll %r2, %r0, 16(%r2) -; nextln: br %r14 +; block0: +; sllk %r5, %r3, 3 +; nill %r3, 65532 +; sllk %r2, %r4, 16 +; l %r0, 0(%r3) +; 0: rll %r1, %r0, 0(%r5) ; clr %r2, %r1 ; jgnl 1f ; risbgn %r1, %r2, 32, 48, 0 ; rll %r1, %r1, 0(%r5) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: +; rll %r2, %r0, 16(%r5) +; br %r14 function %atomic_rmw_umin_i8(i64, i64, i8) -> i8 { block0(v0: i64, v1: i64, v2: i8): @@ -514,18 +517,15 @@ block0(v0: i64, v1: i64, v2: i8): return v3 } -; check: sllk %r2, %r3, 3 -; nextln: nill %r3, 65532 -; nextln: sllk %r4, %r4, 24 -; nextln: lcr %r5, %r2 -; nextln: l %r0, 0(%r3) -; nextln: 0: rll %r1, %r0, 0(%r2) ; clr %r4, %r1 ; jgnl 1f ; risbgn %r1, %r4, 32, 40, 0 ; rll %r1, %r1, 0(%r5) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: -; nextln: rll %r2, %r0, 8(%r2) -; nextln: br %r14 - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; ATOMIC_RMW (UMAX) -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; block0: +; sllk %r5, %r3, 3 +; nill %r3, 65532 +; sllk %r2, %r4, 24 +; lcr %r4, %r5 +; l %r0, 0(%r3) +; 0: rll %r1, %r0, 0(%r5) ; clr %r2, %r1 ; jgnl 1f ; risbgn %r1, %r2, 32, 40, 0 ; rll %r1, %r1, 0(%r4) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: +; rll %r2, %r0, 8(%r5) +; br %r14 function %atomic_rmw_umax_i64(i64, i64, i64) -> i64 { block0(v0: i64, v1: i64, v2: i64): @@ -533,10 +533,11 @@ block0(v0: i64, v1: i64, v2: i64): return v3 } -; check: lg %r0, 0(%r3) -; nextln: 0: clgr %r4, %r0 ; jgnh 1f ; csg %r0, %r4, 0(%r3) ; jglh 0b ; 1: -; nextln: lgr %r2, %r0 -; nextln: br %r14 +; block0: +; lg %r0, 0(%r3) +; 0: clgr %r4, %r0 ; jgnh 1f ; csg %r0, %r4, 0(%r3) ; jglh 0b ; 1: +; lgr %r2, %r0 +; br %r14 function %atomic_rmw_umax_i32(i64, i64, i32) -> i32 { block0(v0: i64, v1: i64, v2: i32): @@ -544,10 +545,11 @@ block0(v0: i64, v1: i64, v2: i32): return v3 } -; check: l %r0, 0(%r3) -; nextln: 0: clr %r4, %r0 ; jgnh 1f ; cs %r0, %r4, 0(%r3) ; jglh 0b ; 1: -; nextln: lr %r2, %r0 -; nextln: br %r14 +; block0: +; l %r0, 0(%r3) +; 0: clr %r4, %r0 ; jgnh 1f ; cs %r0, %r4, 0(%r3) ; jglh 0b ; 1: +; lgr %r2, %r0 +; br %r14 function %atomic_rmw_umax_i16(i64, i64, i16) -> i16 { block0(v0: i64, v1: i64, v2: i16): @@ -555,13 +557,14 @@ block0(v0: i64, v1: i64, v2: i16): return v3 } -; check: sllk %r2, %r3, 3 -; nextln: nill %r3, 65532 -; nextln: sllk %r4, %r4, 16 -; nextln: l %r0, 0(%r3) -; nextln: 0: rll %r1, %r0, 0(%r2) ; clr %r4, %r1 ; jgnh 1f ; risbgn %r1, %r4, 32, 48, 0 ; rll %r1, %r1, 0(%r2) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: -; nextln: rll %r2, %r0, 16(%r2) -; nextln: br %r14 +; block0: +; sllk %r5, %r3, 3 +; nill %r3, 65532 +; sllk %r2, %r4, 16 +; l %r0, 0(%r3) +; 0: rll %r1, %r0, 0(%r5) ; clr %r2, %r1 ; jgnh 1f ; risbgn %r1, %r2, 32, 48, 0 ; rll %r1, %r1, 0(%r5) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: +; rll %r2, %r0, 16(%r5) +; br %r14 function %atomic_rmw_umax_i8(i64, i64, i8) -> i8 { block0(v0: i64, v1: i64, v2: i8): @@ -569,12 +572,13 @@ block0(v0: i64, v1: i64, v2: i8): return v3 } -; check: sllk %r2, %r3, 3 -; nextln: nill %r3, 65532 -; nextln: sllk %r4, %r4, 24 -; nextln: lcr %r5, %r2 -; nextln: l %r0, 0(%r3) -; nextln: 0: rll %r1, %r0, 0(%r2) ; clr %r4, %r1 ; jgnh 1f ; risbgn %r1, %r4, 32, 40, 0 ; rll %r1, %r1, 0(%r5) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: -; nextln: rll %r2, %r0, 8(%r2) -; nextln: br %r14 +; block0: +; sllk %r5, %r3, 3 +; nill %r3, 65532 +; sllk %r2, %r4, 24 +; lcr %r4, %r5 +; l %r0, 0(%r3) +; 0: rll %r1, %r0, 0(%r5) ; clr %r2, %r1 ; jgnh 1f ; risbgn %r1, %r2, 32, 40, 0 ; rll %r1, %r1, 0(%r4) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: +; rll %r2, %r0, 8(%r5) +; br %r14 diff --git a/cranelift/filetests/filetests/isa/s390x/atomic_store-little.clif b/cranelift/filetests/filetests/isa/s390x/atomic_store-little.clif index 176a988f68..1f83d1e81e 100644 --- a/cranelift/filetests/filetests/isa/s390x/atomic_store-little.clif +++ b/cranelift/filetests/filetests/isa/s390x/atomic_store-little.clif @@ -1,4 +1,4 @@ -test compile +test compile precise-output target s390x function %atomic_store_i64(i64, i64) { @@ -7,9 +7,10 @@ block0(v0: i64, v1: i64): return } -; check: strvg %r2, 0(%r3) -; nextln: bcr 14, 0 -; nextln: br %r14 +; block0: +; strvg %r2, 0(%r3) +; bcr 14, 0 +; br %r14 function %atomic_store_i64_sym(i64) { gv0 = symbol colocated %sym @@ -19,9 +20,10 @@ block0(v0: i64): return } -; check: larl %r1, %sym + 0 ; strvg %r2, 0(%r1) -; nextln: bcr 14, 0 -; nextln: br %r14 +; block0: +; larl %r1, %sym + 0 ; strvg %r2, 0(%r1) +; bcr 14, 0 +; br %r14 function %atomic_store_imm_i64(i64) { block0(v0: i64): @@ -30,10 +32,11 @@ block0(v0: i64): return } -; check: lghi %r3, 12345 -; nextln: strvg %r3, 0(%r2) -; nextln: bcr 14, 0 -; nextln: br %r14 +; block0: +; lghi %r4, 12345 +; strvg %r4, 0(%r2) +; bcr 14, 0 +; br %r14 function %atomic_store_i32(i32, i64) { block0(v0: i32, v1: i64): @@ -41,9 +44,10 @@ block0(v0: i32, v1: i64): return } -; check: strv %r2, 0(%r3) -; nextln: bcr 14, 0 -; nextln: br %r14 +; block0: +; strv %r2, 0(%r3) +; bcr 14, 0 +; br %r14 function %atomic_store_i32_sym(i32) { gv0 = symbol colocated %sym @@ -53,9 +57,10 @@ block0(v0: i32): return } -; check: larl %r1, %sym + 0 ; strv %r2, 0(%r1) -; nextln: bcr 14, 0 -; nextln: br %r14 +; block0: +; larl %r1, %sym + 0 ; strv %r2, 0(%r1) +; bcr 14, 0 +; br %r14 function %atomic_store_imm_i32(i64) { block0(v0: i64): @@ -64,10 +69,11 @@ block0(v0: i64): return } -; check: lhi %r3, 12345 -; nextln: strv %r3, 0(%r2) -; nextln: bcr 14, 0 -; nextln: br %r14 +; block0: +; lhi %r4, 12345 +; strv %r4, 0(%r2) +; bcr 14, 0 +; br %r14 function %atomic_store_i16(i16, i64) { block0(v0: i16, v1: i64): @@ -75,9 +81,10 @@ block0(v0: i16, v1: i64): return } -; check: strvh %r2, 0(%r3) -; nextln: bcr 14, 0 -; nextln: br %r14 +; block0: +; strvh %r2, 0(%r3) +; bcr 14, 0 +; br %r14 function %atomic_store_i16_sym(i16) { gv0 = symbol colocated %sym @@ -87,9 +94,10 @@ block0(v0: i16): return } -; check: larl %r1, %sym + 0 ; strvh %r2, 0(%r1) -; nextln: bcr 14, 0 -; nextln: br %r14 +; block0: +; larl %r1, %sym + 0 ; strvh %r2, 0(%r1) +; bcr 14, 0 +; br %r14 function %atomic_store_imm_i16(i64) { block0(v0: i64): @@ -98,9 +106,10 @@ block0(v0: i64): return } -; check: mvhhi 0(%r2), 14640 -; nextln: bcr 14, 0 -; nextln: br %r14 +; block0: +; mvhhi 0(%r2), 14640 +; bcr 14, 0 +; br %r14 function %atomic_store_i8(i8, i64) { block0(v0: i8, v1: i64): @@ -108,9 +117,10 @@ block0(v0: i8, v1: i64): return } -; check: stc %r2, 0(%r3) -; nextln: bcr 14, 0 -; nextln: br %r14 +; block0: +; stc %r2, 0(%r3) +; bcr 14, 0 +; br %r14 function %atomic_store_imm_i8(i64) { block0(v0: i64): @@ -119,7 +129,8 @@ block0(v0: i64): return } -; check: mvi 0(%r2), 123 -; nextln: bcr 14, 0 -; nextln: br %r14 +; block0: +; mvi 0(%r2), 123 +; bcr 14, 0 +; br %r14 diff --git a/cranelift/filetests/filetests/isa/s390x/atomic_store.clif b/cranelift/filetests/filetests/isa/s390x/atomic_store.clif index c7cc4c1dab..f536779be3 100644 --- a/cranelift/filetests/filetests/isa/s390x/atomic_store.clif +++ b/cranelift/filetests/filetests/isa/s390x/atomic_store.clif @@ -1,4 +1,4 @@ -test compile +test compile precise-output target s390x function %atomic_store_i64(i64, i64) { @@ -7,9 +7,10 @@ block0(v0: i64, v1: i64): return } -; check: stg %r2, 0(%r3) -; nextln: bcr 14, 0 -; nextln: br %r14 +; block0: +; stg %r2, 0(%r3) +; bcr 14, 0 +; br %r14 function %atomic_store_i64_sym(i64) { gv0 = symbol colocated %sym @@ -19,9 +20,10 @@ block0(v0: i64): return } -; check: stgrl %r2, %sym + 0 -; nextln: bcr 14, 0 -; nextln: br %r14 +; block0: +; stgrl %r2, %sym + 0 +; bcr 14, 0 +; br %r14 function %atomic_store_imm_i64(i64) { block0(v0: i64): @@ -30,9 +32,10 @@ block0(v0: i64): return } -; check: mvghi 0(%r2), 12345 -; nextln: bcr 14, 0 -; nextln: br %r14 +; block0: +; mvghi 0(%r2), 12345 +; bcr 14, 0 +; br %r14 function %atomic_store_i32(i32, i64) { block0(v0: i32, v1: i64): @@ -40,9 +43,10 @@ block0(v0: i32, v1: i64): return } -; check: st %r2, 0(%r3) -; nextln: bcr 14, 0 -; nextln: br %r14 +; block0: +; st %r2, 0(%r3) +; bcr 14, 0 +; br %r14 function %atomic_store_i32_sym(i32) { gv0 = symbol colocated %sym @@ -52,9 +56,10 @@ block0(v0: i32): return } -; check: strl %r2, %sym + 0 -; nextln: bcr 14, 0 -; nextln: br %r14 +; block0: +; strl %r2, %sym + 0 +; bcr 14, 0 +; br %r14 function %atomic_store_imm_i32(i64) { block0(v0: i64): @@ -63,9 +68,10 @@ block0(v0: i64): return } -; check: mvhi 0(%r2), 12345 -; nextln: bcr 14, 0 -; nextln: br %r14 +; block0: +; mvhi 0(%r2), 12345 +; bcr 14, 0 +; br %r14 function %atomic_store_i16(i16, i64) { block0(v0: i16, v1: i64): @@ -73,9 +79,10 @@ block0(v0: i16, v1: i64): return } -; check: sth %r2, 0(%r3) -; nextln: bcr 14, 0 -; nextln: br %r14 +; block0: +; sth %r2, 0(%r3) +; bcr 14, 0 +; br %r14 function %atomic_store_i16_sym(i16) { gv0 = symbol colocated %sym @@ -85,9 +92,10 @@ block0(v0: i16): return } -; check: sthrl %r2, %sym + 0 -; nextln: bcr 14, 0 -; nextln: br %r14 +; block0: +; sthrl %r2, %sym + 0 +; bcr 14, 0 +; br %r14 function %atomic_store_imm_i16(i64) { block0(v0: i64): @@ -96,9 +104,10 @@ block0(v0: i64): return } -; check: mvhhi 0(%r2), 12345 -; nextln: bcr 14, 0 -; nextln: br %r14 +; block0: +; mvhhi 0(%r2), 12345 +; bcr 14, 0 +; br %r14 function %atomic_store_i8(i8, i64) { block0(v0: i8, v1: i64): @@ -106,9 +115,10 @@ block0(v0: i8, v1: i64): return } -; check: stc %r2, 0(%r3) -; nextln: bcr 14, 0 -; nextln: br %r14 +; block0: +; stc %r2, 0(%r3) +; bcr 14, 0 +; br %r14 function %atomic_store_imm_i8(i64) { block0(v0: i64): @@ -117,7 +127,8 @@ block0(v0: i64): return } -; check: mvi 0(%r2), 123 -; nextln: bcr 14, 0 -; nextln: br %r14 +; block0: +; mvi 0(%r2), 123 +; bcr 14, 0 +; br %r14 diff --git a/cranelift/filetests/filetests/isa/s390x/bitops-arch13.clif b/cranelift/filetests/filetests/isa/s390x/bitops-arch13.clif index 4350ab053d..e06b019347 100644 --- a/cranelift/filetests/filetests/isa/s390x/bitops-arch13.clif +++ b/cranelift/filetests/filetests/isa/s390x/bitops-arch13.clif @@ -1,4 +1,4 @@ -test compile +test compile precise-output target s390x arch13 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; @@ -11,8 +11,9 @@ block0(v0: i64): return v1 } -; check: popcnt %r2, %r2, 8 -; nextln: br %r14 +; block0: +; popcnt %r2, %r2, 8 +; br %r14 function %popcnt_i32(i32) -> i32 { block0(v0: i32): @@ -20,9 +21,10 @@ block0(v0: i32): return v1 } -; check: llgfr %r2, %r2 -; nextln: popcnt %r2, %r2, 8 -; nextln: br %r14 +; block0: +; llgfr %r5, %r2 +; popcnt %r2, %r5, 8 +; br %r14 function %popcnt_i16(i16) -> i16 { block0(v0: i16): @@ -30,9 +32,10 @@ block0(v0: i16): return v1 } -; check: llghr %r2, %r2 -; nextln: popcnt %r2, %r2, 8 -; nextln: br %r14 +; block0: +; llghr %r5, %r2 +; popcnt %r2, %r5, 8 +; br %r14 function %popcnt_i8(i8) -> i8 { block0(v0: i8): @@ -40,5 +43,7 @@ block0(v0: i8): return v1 } -; check: popcnt %r2, %r2 -; nextln: br %r14 +; block0: +; popcnt %r2, %r2 +; br %r14 + diff --git a/cranelift/filetests/filetests/isa/s390x/bitops.clif b/cranelift/filetests/filetests/isa/s390x/bitops.clif index 8939a946af..444b5242e1 100644 --- a/cranelift/filetests/filetests/isa/s390x/bitops.clif +++ b/cranelift/filetests/filetests/isa/s390x/bitops.clif @@ -1,4 +1,4 @@ -test compile +test compile precise-output target s390x ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; @@ -42,9 +42,10 @@ block0(v0: i64): return v1 } -; check: flogr %r0, %r2 -; nextln: lgr %r2, %r0 -; nextln: br %r14 +; block0: +; flogr %r0, %r2 +; lgr %r2, %r0 +; br %r14 function %clz_i32(i32) -> i32 { block0(v0: i32): @@ -52,10 +53,11 @@ block0(v0: i32): return v1 } -; check: llgfr %r2, %r2 -; nextln: flogr %r0, %r2 -; nextln: ahik %r2, %r0, -32 -; nextln: br %r14 +; block0: +; llgfr %r5, %r2 +; flogr %r0, %r5 +; ahik %r2, %r0, -32 +; br %r14 function %clz_i16(i16) -> i16 { block0(v0: i16): @@ -63,10 +65,11 @@ block0(v0: i16): return v1 } -; check: llghr %r2, %r2 -; nextln: flogr %r0, %r2 -; nextln: ahik %r2, %r0, -48 -; nextln: br %r14 +; block0: +; llghr %r5, %r2 +; flogr %r0, %r5 +; ahik %r2, %r0, -48 +; br %r14 function %clz_i8(i8) -> i8 { block0(v0: i8): @@ -74,14 +77,11 @@ block0(v0: i8): return v1 } -; check: llgcr %r2, %r2 -; nextln: flogr %r0, %r2 -; nextln: ahik %r2, %r0, -56 -; nextln: br %r14 - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; CLS -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; block0: +; llgcr %r5, %r2 +; flogr %r0, %r5 +; ahik %r2, %r0, -56 +; br %r14 function %cls_i64(i64) -> i64 { block0(v0: i64): @@ -89,11 +89,12 @@ block0(v0: i64): return v1 } -; check: srag %r3, %r2, 63 -; nextln: xgr %r2, %r3 -; nextln: flogr %r0, %r2 -; nextln: lgr %r2, %r0 -; nextln: br %r14 +; block0: +; srag %r5, %r2, 63 +; xgrk %r3, %r2, %r5 +; flogr %r0, %r3 +; lgr %r2, %r0 +; br %r14 function %cls_i32(i32) -> i32 { block0(v0: i32): @@ -101,12 +102,13 @@ block0(v0: i32): return v1 } -; check: lgfr %r2, %r2 -; nextln: srag %r3, %r2, 63 -; nextln: xgr %r2, %r3 -; nextln: flogr %r0, %r2 -; nextln: ahik %r2, %r0, -32 -; nextln: br %r14 +; block0: +; lgfr %r5, %r2 +; srag %r3, %r5, 63 +; xgr %r5, %r3 +; flogr %r0, %r5 +; ahik %r2, %r0, -32 +; br %r14 function %cls_i16(i16) -> i16 { block0(v0: i16): @@ -114,12 +116,13 @@ block0(v0: i16): return v1 } -; check: lghr %r2, %r2 -; nextln: srag %r3, %r2, 63 -; nextln: xgr %r2, %r3 -; nextln: flogr %r0, %r2 -; nextln: ahik %r2, %r0, -48 -; nextln: br %r14 +; block0: +; lghr %r5, %r2 +; srag %r3, %r5, 63 +; xgr %r5, %r3 +; flogr %r0, %r5 +; ahik %r2, %r0, -48 +; br %r14 function %cls_i8(i8) -> i8 { block0(v0: i8): @@ -127,16 +130,13 @@ block0(v0: i8): return v1 } -; check: lgbr %r2, %r2 -; nextln: srag %r3, %r2, 63 -; nextln: xgr %r2, %r3 -; nextln: flogr %r0, %r2 -; nextln: ahik %r2, %r0, -56 -; nextln: br %r14 - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; CTZ -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; block0: +; lgbr %r5, %r2 +; srag %r3, %r5, 63 +; xgr %r5, %r3 +; flogr %r0, %r5 +; ahik %r2, %r0, -56 +; br %r14 function %ctz_i64(i64) -> i64 { block0(v0: i64): @@ -144,13 +144,14 @@ block0(v0: i64): return v1 } -; check: lcgr %r3, %r2 -; nextln: ngr %r2, %r3 -; nextln: flogr %r0, %r2 -; nextln: locghie %r0, -1 -; nextln: lghi %r2, 63 -; nextln: sgr %r2, %r0 -; nextln: br %r14 +; block0: +; lcgr %r5, %r2 +; ngrk %r3, %r2, %r5 +; flogr %r0, %r3 +; locghie %r0, -1 +; lghi %r3, 63 +; sgrk %r2, %r3, %r0 +; br %r14 function %ctz_i32(i32) -> i32 { block0(v0: i32): @@ -158,13 +159,14 @@ block0(v0: i32): return v1 } -; check: oihl %r2, 1 -; nextln: lcgr %r3, %r2 -; nextln: ngr %r2, %r3 -; nextln: flogr %r0, %r2 -; nextln: lhi %r2, 63 -; nextln: sr %r2, %r0 -; nextln: br %r14 +; block0: +; oihl %r2, 1 +; lcgr %r4, %r2 +; ngr %r2, %r4 +; flogr %r0, %r2 +; lhi %r5, 63 +; srk %r2, %r5, %r0 +; br %r14 function %ctz_i16(i16) -> i16 { block0(v0: i16): @@ -172,13 +174,14 @@ block0(v0: i16): return v1 } -; check: oilh %r2, 1 -; nextln: lcgr %r3, %r2 -; nextln: ngr %r2, %r3 -; nextln: flogr %r0, %r2 -; nextln: lhi %r2, 63 -; nextln: sr %r2, %r0 -; nextln: br %r14 +; block0: +; oilh %r2, 1 +; lcgr %r4, %r2 +; ngr %r2, %r4 +; flogr %r0, %r2 +; lhi %r5, 63 +; srk %r2, %r5, %r0 +; br %r14 function %ctz_i8(i8) -> i8 { block0(v0: i8): @@ -186,17 +189,14 @@ block0(v0: i8): return v1 } -; check: oill %r2, 256 -; nextln: lcgr %r3, %r2 -; nextln: ngr %r2, %r3 -; nextln: flogr %r0, %r2 -; nextln: lhi %r2, 63 -; nextln: sr %r2, %r0 -; nextln: br %r14 - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; POPCNT -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; block0: +; oill %r2, 256 +; lcgr %r4, %r2 +; ngr %r2, %r4 +; flogr %r0, %r2 +; lhi %r5, 63 +; srk %r2, %r5, %r0 +; br %r14 function %popcnt_i64(i64) -> i64 { block0(v0: i64): @@ -204,15 +204,16 @@ block0(v0: i64): return v1 } -; check: popcnt %r2, %r2 -; nextln: sllg %r3, %r2, 32 -; nextln: agr %r2, %r3 -; nextln: sllg %r3, %r2, 16 -; nextln: agr %r2, %r3 -; nextln: sllg %r3, %r2, 8 -; nextln: agr %r2, %r3 -; nextln: srlg %r2, %r2, 56 -; nextln: br %r14 +; block0: +; popcnt %r5, %r2 +; sllg %r3, %r5, 32 +; agr %r5, %r3 +; sllg %r3, %r5, 16 +; agr %r5, %r3 +; sllg %r3, %r5, 8 +; agr %r5, %r3 +; srlg %r2, %r5, 56 +; br %r14 function %popcnt_i32(i32) -> i32 { block0(v0: i32): @@ -220,13 +221,14 @@ block0(v0: i32): return v1 } -; check: popcnt %r2, %r2 -; nextln: sllk %r3, %r2, 16 -; nextln: ar %r2, %r3 -; nextln: sllk %r3, %r2, 8 -; nextln: ar %r2, %r3 -; nextln: srlk %r2, %r2, 24 -; nextln: br %r14 +; block0: +; popcnt %r5, %r2 +; sllk %r3, %r5, 16 +; ar %r5, %r3 +; sllk %r3, %r5, 8 +; ar %r5, %r3 +; srlk %r2, %r5, 24 +; br %r14 function %popcnt_i16(i16) -> i16 { block0(v0: i16): @@ -234,11 +236,12 @@ block0(v0: i16): return v1 } -; check: popcnt %r2, %r2 -; nextln: sllk %r3, %r2, 8 -; nextln: ar %r2, %r3 -; nextln: srlk %r2, %r2, 8 -; nextln: br %r14 +; block0: +; popcnt %r5, %r2 +; sllk %r3, %r5, 8 +; ar %r5, %r3 +; srlk %r2, %r5, 8 +; br %r14 function %popcnt_i8(i8) -> i8 { block0(v0: i8): @@ -246,5 +249,7 @@ block0(v0: i8): return v1 } -; check: popcnt %r2, %r2 -; nextln: br %r14 +; block0: +; popcnt %r2, %r2 +; br %r14 + diff --git a/cranelift/filetests/filetests/isa/s390x/bitwise-arch13.clif b/cranelift/filetests/filetests/isa/s390x/bitwise-arch13.clif index 5630fee0dc..f0298ce69c 100644 --- a/cranelift/filetests/filetests/isa/s390x/bitwise-arch13.clif +++ b/cranelift/filetests/filetests/isa/s390x/bitwise-arch13.clif @@ -1,5 +1,5 @@ -test compile +test compile precise-output target s390x arch13 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; @@ -12,8 +12,9 @@ block0(v0: i64, v1: i64): return v2 } -; check: nngrk %r2, %r2, %r3 -; nextln: br %r14 +; block0: +; nngrk %r2, %r2, %r3 +; br %r14 function %band_not_i32(i32, i32) -> i32 { block0(v0: i32, v1: i32): @@ -21,8 +22,9 @@ block0(v0: i32, v1: i32): return v2 } -; check: nnrk %r2, %r2, %r3 -; nextln: br %r14 +; block0: +; nnrk %r2, %r2, %r3 +; br %r14 function %band_not_i16(i16, i16) -> i16 { block0(v0: i16, v1: i16): @@ -30,8 +32,9 @@ block0(v0: i16, v1: i16): return v2 } -; check: nnrk %r2, %r2, %r3 -; nextln: br %r14 +; block0: +; nnrk %r2, %r2, %r3 +; br %r14 function %band_not_i8(i8, i8) -> i8 { block0(v0: i8, v1: i8): @@ -39,12 +42,9 @@ block0(v0: i8, v1: i8): return v2 } -; check: nnrk %r2, %r2, %r3 -; nextln: br %r14 - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; BOR_NOT -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; block0: +; nnrk %r2, %r2, %r3 +; br %r14 function %bor_not_i64(i64, i64) -> i64 { block0(v0: i64, v1: i64): @@ -52,8 +52,9 @@ block0(v0: i64, v1: i64): return v2 } -; check: nogrk %r2, %r2, %r3 -; nextln: br %r14 +; block0: +; nogrk %r2, %r2, %r3 +; br %r14 function %bor_not_i32(i32, i32) -> i32 { block0(v0: i32, v1: i32): @@ -61,8 +62,9 @@ block0(v0: i32, v1: i32): return v2 } -; check: nork %r2, %r2, %r3 -; nextln: br %r14 +; block0: +; nork %r2, %r2, %r3 +; br %r14 function %bor_not_i16(i16, i16) -> i16 { block0(v0: i16, v1: i16): @@ -70,8 +72,9 @@ block0(v0: i16, v1: i16): return v2 } -; check: nork %r2, %r2, %r3 -; nextln: br %r14 +; block0: +; nork %r2, %r2, %r3 +; br %r14 function %bor_not_i8(i8, i8) -> i8 { block0(v0: i8, v1: i8): @@ -79,12 +82,9 @@ block0(v0: i8, v1: i8): return v2 } -; check: nork %r2, %r2, %r3 -; nextln: br %r14 - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; BXOR_NOT -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; block0: +; nork %r2, %r2, %r3 +; br %r14 function %bxor_not_i64(i64, i64) -> i64 { block0(v0: i64, v1: i64): @@ -92,8 +92,9 @@ block0(v0: i64, v1: i64): return v2 } -; check: nxgrk %r2, %r2, %r3 -; nextln: br %r14 +; block0: +; nxgrk %r2, %r2, %r3 +; br %r14 function %bxor_not_i32(i32, i32) -> i32 { block0(v0: i32, v1: i32): @@ -101,8 +102,9 @@ block0(v0: i32, v1: i32): return v2 } -; check: nxrk %r2, %r2, %r3 -; nextln: br %r14 +; block0: +; nxrk %r2, %r2, %r3 +; br %r14 function %bxor_not_i16(i16, i16) -> i16 { block0(v0: i16, v1: i16): @@ -110,8 +112,9 @@ block0(v0: i16, v1: i16): return v2 } -; check: nxrk %r2, %r2, %r3 -; nextln: br %r14 +; block0: +; nxrk %r2, %r2, %r3 +; br %r14 function %bxor_not_i8(i8, i8) -> i8 { block0(v0: i8, v1: i8): @@ -119,12 +122,9 @@ block0(v0: i8, v1: i8): return v2 } -; check: nxrk %r2, %r2, %r3 -; nextln: br %r14 - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; BNOT -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; block0: +; nxrk %r2, %r2, %r3 +; br %r14 function %bnot_i64(i64) -> i64 { block0(v0: i64): @@ -132,8 +132,9 @@ block0(v0: i64): return v1 } -; check: nogrk %r2, %r2, %r2 -; nextln: br %r14 +; block0: +; nogrk %r2, %r2, %r2 +; br %r14 function %bnot_i32(i32) -> i32 { block0(v0: i32): @@ -141,8 +142,9 @@ block0(v0: i32): return v1 } -; check: nork %r2, %r2, %r2 -; nextln: br %r14 +; block0: +; nork %r2, %r2, %r2 +; br %r14 function %bnot_i16(i16) -> i16 { block0(v0: i16): @@ -150,8 +152,9 @@ block0(v0: i16): return v1 } -; check: nork %r2, %r2, %r2 -; nextln: br %r14 +; block0: +; nork %r2, %r2, %r2 +; br %r14 function %bnot_i8(i8) -> i8 { block0(v0: i8): @@ -159,12 +162,9 @@ block0(v0: i8): return v1 } -; check: nork %r2, %r2, %r2 -; nextln: br %r14 - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; BITSELECT -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; block0: +; nork %r2, %r2, %r2 +; br %r14 function %bitselect_i64(i64, i64, i64) -> i64 { block0(v0: i64, v1: i64, v2: i64): @@ -172,10 +172,11 @@ block0(v0: i64, v1: i64, v2: i64): return v3 } -; check: ngr %r3, %r2 -; nextln: nngrk %r2, %r4, %r2 -; nextln: ogr %r2, %r3 -; nextln: br %r14 +; block0: +; ngrk %r5, %r3, %r2 +; nngrk %r3, %r4, %r2 +; ogrk %r2, %r3, %r5 +; br %r14 function %bitselect_i32(i32, i32, i32) -> i32 { block0(v0: i32, v1: i32, v2: i32): @@ -183,10 +184,11 @@ block0(v0: i32, v1: i32, v2: i32): return v3 } -; check: nr %r3, %r2 -; nextln: nnrk %r2, %r4, %r2 -; nextln: or %r2, %r3 -; nextln: br %r14 +; block0: +; nrk %r5, %r3, %r2 +; nnrk %r3, %r4, %r2 +; ork %r2, %r3, %r5 +; br %r14 function %bitselect_i16(i16, i16, i16) -> i16 { block0(v0: i16, v1: i16, v2: i16): @@ -194,10 +196,11 @@ block0(v0: i16, v1: i16, v2: i16): return v3 } -; check: nr %r3, %r2 -; nextln: nnrk %r2, %r4, %r2 -; nextln: or %r2, %r3 -; nextln: br %r14 +; block0: +; nrk %r5, %r3, %r2 +; nnrk %r3, %r4, %r2 +; ork %r2, %r3, %r5 +; br %r14 function %bitselect_i8(i8, i8, i8) -> i8 { block0(v0: i8, v1: i8, v2: i8): @@ -205,8 +208,9 @@ block0(v0: i8, v1: i8, v2: i8): return v3 } -; check: nr %r3, %r2 -; nextln: nnrk %r2, %r4, %r2 -; nextln: or %r2, %r3 -; nextln: br %r14 +; block0: +; nrk %r5, %r3, %r2 +; nnrk %r3, %r4, %r2 +; ork %r2, %r3, %r5 +; br %r14 diff --git a/cranelift/filetests/filetests/isa/s390x/bitwise.clif b/cranelift/filetests/filetests/isa/s390x/bitwise.clif index 205c692300..3fe6cbfd2c 100644 --- a/cranelift/filetests/filetests/isa/s390x/bitwise.clif +++ b/cranelift/filetests/filetests/isa/s390x/bitwise.clif @@ -1,5 +1,5 @@ -test compile +test compile precise-output target s390x ; FIXME: add immediate operand versions @@ -14,8 +14,9 @@ block0(v0: i64, v1: i64): return v2 } -; check: ngr %r2, %r3 -; nextln: br %r14 +; block0: +; ngr %r2, %r3 +; br %r14 function %band_i64_mem(i64, i64) -> i64 { block0(v0: i64, v1: i64): @@ -24,8 +25,9 @@ block0(v0: i64, v1: i64): return v3 } -; check: ng %r2, 0(%r3) -; nextln: br %r14 +; block0: +; ng %r2, 0(%r3) +; br %r14 function %band_i32(i32, i32) -> i32 { block0(v0: i32, v1: i32): @@ -33,8 +35,9 @@ block0(v0: i32, v1: i32): return v2 } -; check: nr %r2, %r3 -; nextln: br %r14 +; block0: +; nr %r2, %r3 +; br %r14 function %band_i32_mem(i32, i64) -> i32 { block0(v0: i32, v1: i64): @@ -43,8 +46,9 @@ block0(v0: i32, v1: i64): return v3 } -; check: n %r2, 0(%r3) -; nextln: br %r14 +; block0: +; n %r2, 0(%r3) +; br %r14 function %band_i32_memoff(i32, i64) -> i32 { block0(v0: i32, v1: i64): @@ -53,8 +57,9 @@ block0(v0: i32, v1: i64): return v3 } -; check: ny %r2, 4096(%r3) -; nextln: br %r14 +; block0: +; ny %r2, 4096(%r3) +; br %r14 function %band_i16(i16, i16) -> i16 { block0(v0: i16, v1: i16): @@ -62,8 +67,9 @@ block0(v0: i16, v1: i16): return v2 } -; check: nr %r2, %r3 -; nextln: br %r14 +; block0: +; nr %r2, %r3 +; br %r14 function %band_i16_mem(i16, i64) -> i16 { block0(v0: i16, v1: i64): @@ -72,9 +78,10 @@ block0(v0: i16, v1: i64): return v3 } -; check: llh %r3, 0(%r3) -; nextln: nr %r2, %r3 -; nextln: br %r14 +; block0: +; llh %r4, 0(%r3) +; nr %r2, %r4 +; br %r14 function %band_i8(i8, i8) -> i8 { block0(v0: i8, v1: i8): @@ -82,8 +89,9 @@ block0(v0: i8, v1: i8): return v2 } -; check: nr %r2, %r3 -; nextln: br %r14 +; block0: +; nr %r2, %r3 +; br %r14 function %band_i8_mem(i8, i64) -> i8 { block0(v0: i8, v1: i64): @@ -92,13 +100,10 @@ block0(v0: i8, v1: i64): return v3 } -; check: llc %r3, 0(%r3) -; nextln: nr %r2, %r3 -; nextln: br %r14 - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; BOR -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; block0: +; llc %r4, 0(%r3) +; nr %r2, %r4 +; br %r14 function %bor_i64(i64, i64) -> i64 { block0(v0: i64, v1: i64): @@ -106,8 +111,9 @@ block0(v0: i64, v1: i64): return v2 } -; check: ogr %r2, %r3 -; nextln: br %r14 +; block0: +; ogr %r2, %r3 +; br %r14 function %bor_i64_mem(i64, i64) -> i64 { block0(v0: i64, v1: i64): @@ -116,8 +122,9 @@ block0(v0: i64, v1: i64): return v3 } -; check: og %r2, 0(%r3) -; nextln: br %r14 +; block0: +; og %r2, 0(%r3) +; br %r14 function %bor_i32(i32, i32) -> i32 { block0(v0: i32, v1: i32): @@ -125,8 +132,9 @@ block0(v0: i32, v1: i32): return v2 } -; check: or %r2, %r3 -; nextln: br %r14 +; block0: +; or %r2, %r3 +; br %r14 function %bor_i32_mem(i32, i64) -> i32 { block0(v0: i32, v1: i64): @@ -135,8 +143,9 @@ block0(v0: i32, v1: i64): return v3 } -; check: o %r2, 0(%r3) -; nextln: br %r14 +; block0: +; o %r2, 0(%r3) +; br %r14 function %bor_i32_memoff(i32, i64) -> i32 { block0(v0: i32, v1: i64): @@ -145,8 +154,9 @@ block0(v0: i32, v1: i64): return v3 } -; check: oy %r2, 4096(%r3) -; nextln: br %r14 +; block0: +; oy %r2, 4096(%r3) +; br %r14 function %bor_i16(i16, i16) -> i16 { block0(v0: i16, v1: i16): @@ -154,8 +164,9 @@ block0(v0: i16, v1: i16): return v2 } -; check: or %r2, %r3 -; nextln: br %r14 +; block0: +; or %r2, %r3 +; br %r14 function %bor_i16_mem(i16, i64) -> i16 { block0(v0: i16, v1: i64): @@ -164,9 +175,10 @@ block0(v0: i16, v1: i64): return v3 } -; check: llh %r3, 0(%r3) -; nextln: or %r2, %r3 -; nextln: br %r14 +; block0: +; llh %r4, 0(%r3) +; or %r2, %r4 +; br %r14 function %bor_i8(i8, i8) -> i8 { block0(v0: i8, v1: i8): @@ -174,8 +186,9 @@ block0(v0: i8, v1: i8): return v2 } -; check: or %r2, %r3 -; nextln: br %r14 +; block0: +; or %r2, %r3 +; br %r14 function %bor_i8_mem(i8, i64) -> i8 { block0(v0: i8, v1: i64): @@ -184,13 +197,10 @@ block0(v0: i8, v1: i64): return v3 } -; check: llc %r3, 0(%r3) -; nextln: or %r2, %r3 -; nextln: br %r14 - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; BXOR -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; block0: +; llc %r4, 0(%r3) +; or %r2, %r4 +; br %r14 function %bxor_i64(i64, i64) -> i64 { block0(v0: i64, v1: i64): @@ -198,8 +208,9 @@ block0(v0: i64, v1: i64): return v2 } -; check: xgr %r2, %r3 -; nextln: br %r14 +; block0: +; xgr %r2, %r3 +; br %r14 function %bxor_i64_mem(i64, i64) -> i64 { block0(v0: i64, v1: i64): @@ -208,8 +219,9 @@ block0(v0: i64, v1: i64): return v3 } -; check: xg %r2, 0(%r3) -; nextln: br %r14 +; block0: +; xg %r2, 0(%r3) +; br %r14 function %bxor_i32(i32, i32) -> i32 { block0(v0: i32, v1: i32): @@ -217,8 +229,9 @@ block0(v0: i32, v1: i32): return v2 } -; check: xr %r2, %r3 -; nextln: br %r14 +; block0: +; xr %r2, %r3 +; br %r14 function %bxor_i32_mem(i32, i64) -> i32 { block0(v0: i32, v1: i64): @@ -227,8 +240,9 @@ block0(v0: i32, v1: i64): return v3 } -; check: x %r2, 0(%r3) -; nextln: br %r14 +; block0: +; x %r2, 0(%r3) +; br %r14 function %bxor_i32_memoff(i32, i64) -> i32 { block0(v0: i32, v1: i64): @@ -237,8 +251,9 @@ block0(v0: i32, v1: i64): return v3 } -; check: xy %r2, 4096(%r3) -; nextln: br %r14 +; block0: +; xy %r2, 4096(%r3) +; br %r14 function %bxor_i16(i16, i16) -> i16 { block0(v0: i16, v1: i16): @@ -246,8 +261,9 @@ block0(v0: i16, v1: i16): return v2 } -; check: xr %r2, %r3 -; nextln: br %r14 +; block0: +; xr %r2, %r3 +; br %r14 function %bxor_i16_mem(i16, i64) -> i16 { block0(v0: i16, v1: i64): @@ -256,9 +272,10 @@ block0(v0: i16, v1: i64): return v3 } -; check: llh %r3, 0(%r3) -; nextln: xr %r2, %r3 -; nextln: br %r14 +; block0: +; llh %r4, 0(%r3) +; xr %r2, %r4 +; br %r14 function %bxor_i8(i8, i8) -> i8 { block0(v0: i8, v1: i8): @@ -266,8 +283,9 @@ block0(v0: i8, v1: i8): return v2 } -; check: xr %r2, %r3 -; nextln: br %r14 +; block0: +; xr %r2, %r3 +; br %r14 function %bxor_i8_mem(i8, i64) -> i8 { block0(v0: i8, v1: i64): @@ -276,13 +294,10 @@ block0(v0: i8, v1: i64): return v3 } -; check: llc %r3, 0(%r3) -; nextln: xr %r2, %r3 -; nextln: br %r14 - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; BAND_NOT -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; block0: +; llc %r4, 0(%r3) +; xr %r2, %r4 +; br %r14 function %band_not_i64(i64, i64) -> i64 { block0(v0: i64, v1: i64): @@ -290,10 +305,11 @@ block0(v0: i64, v1: i64): return v2 } -; check: ngr %r2, %r3 -; nextln: xilf %r2, 4294967295 -; nextln: xihf %r2, 4294967295 -; nextln: br %r14 +; block0: +; ngr %r2, %r3 +; xilf %r2, 4294967295 +; xihf %r2, 4294967295 +; br %r14 function %band_not_i32(i32, i32) -> i32 { block0(v0: i32, v1: i32): @@ -301,9 +317,10 @@ block0(v0: i32, v1: i32): return v2 } -; check: nr %r2, %r3 -; nextln: xilf %r2, 4294967295 -; nextln: br %r14 +; block0: +; nr %r2, %r3 +; xilf %r2, 4294967295 +; br %r14 function %band_not_i16(i16, i16) -> i16 { block0(v0: i16, v1: i16): @@ -311,9 +328,10 @@ block0(v0: i16, v1: i16): return v2 } -; check: nr %r2, %r3 -; nextln: xilf %r2, 4294967295 -; nextln: br %r14 +; block0: +; nr %r2, %r3 +; xilf %r2, 4294967295 +; br %r14 function %band_not_i8(i8, i8) -> i8 { block0(v0: i8, v1: i8): @@ -321,13 +339,10 @@ block0(v0: i8, v1: i8): return v2 } -; check: nr %r2, %r3 -; nextln: xilf %r2, 4294967295 -; nextln: br %r14 - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; BOR_NOT -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; block0: +; nr %r2, %r3 +; xilf %r2, 4294967295 +; br %r14 function %bor_not_i64(i64, i64) -> i64 { block0(v0: i64, v1: i64): @@ -335,10 +350,11 @@ block0(v0: i64, v1: i64): return v2 } -; check: ogr %r2, %r3 -; nextln: xilf %r2, 4294967295 -; nextln: xihf %r2, 4294967295 -; nextln: br %r14 +; block0: +; ogr %r2, %r3 +; xilf %r2, 4294967295 +; xihf %r2, 4294967295 +; br %r14 function %bor_not_i32(i32, i32) -> i32 { block0(v0: i32, v1: i32): @@ -346,9 +362,10 @@ block0(v0: i32, v1: i32): return v2 } -; check: or %r2, %r3 -; nextln: xilf %r2, 4294967295 -; nextln: br %r14 +; block0: +; or %r2, %r3 +; xilf %r2, 4294967295 +; br %r14 function %bor_not_i16(i16, i16) -> i16 { block0(v0: i16, v1: i16): @@ -356,9 +373,10 @@ block0(v0: i16, v1: i16): return v2 } -; check: or %r2, %r3 -; nextln: xilf %r2, 4294967295 -; nextln: br %r14 +; block0: +; or %r2, %r3 +; xilf %r2, 4294967295 +; br %r14 function %bor_not_i8(i8, i8) -> i8 { block0(v0: i8, v1: i8): @@ -366,13 +384,10 @@ block0(v0: i8, v1: i8): return v2 } -; check: or %r2, %r3 -; nextln: xilf %r2, 4294967295 -; nextln: br %r14 - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; BXOR_NOT -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; block0: +; or %r2, %r3 +; xilf %r2, 4294967295 +; br %r14 function %bxor_not_i64(i64, i64) -> i64 { block0(v0: i64, v1: i64): @@ -380,10 +395,11 @@ block0(v0: i64, v1: i64): return v2 } -; check: xgr %r2, %r3 -; nextln: xilf %r2, 4294967295 -; nextln: xihf %r2, 4294967295 -; nextln: br %r14 +; block0: +; xgr %r2, %r3 +; xilf %r2, 4294967295 +; xihf %r2, 4294967295 +; br %r14 function %bxor_not_i32(i32, i32) -> i32 { block0(v0: i32, v1: i32): @@ -391,9 +407,10 @@ block0(v0: i32, v1: i32): return v2 } -; check: xr %r2, %r3 -; nextln: xilf %r2, 4294967295 -; nextln: br %r14 +; block0: +; xr %r2, %r3 +; xilf %r2, 4294967295 +; br %r14 function %bxor_not_i16(i16, i16) -> i16 { block0(v0: i16, v1: i16): @@ -401,9 +418,10 @@ block0(v0: i16, v1: i16): return v2 } -; check: xr %r2, %r3 -; nextln: xilf %r2, 4294967295 -; nextln: br %r14 +; block0: +; xr %r2, %r3 +; xilf %r2, 4294967295 +; br %r14 function %bxor_not_i8(i8, i8) -> i8 { block0(v0: i8, v1: i8): @@ -411,13 +429,10 @@ block0(v0: i8, v1: i8): return v2 } -; check: xr %r2, %r3 -; nextln: xilf %r2, 4294967295 -; nextln: br %r14 - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; BNOT -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; block0: +; xr %r2, %r3 +; xilf %r2, 4294967295 +; br %r14 function %bnot_i64(i64) -> i64 { block0(v0: i64): @@ -425,9 +440,10 @@ block0(v0: i64): return v1 } -; check: xilf %r2, 4294967295 -; nextln: xihf %r2, 4294967295 -; nextln: br %r14 +; block0: +; xilf %r2, 4294967295 +; xihf %r2, 4294967295 +; br %r14 function %bnot_i32(i32) -> i32 { block0(v0: i32): @@ -435,8 +451,9 @@ block0(v0: i32): return v1 } -; check: xilf %r2, 4294967295 -; nextln: br %r14 +; block0: +; xilf %r2, 4294967295 +; br %r14 function %bnot_i16(i16) -> i16 { block0(v0: i16): @@ -444,8 +461,9 @@ block0(v0: i16): return v1 } -; check: xilf %r2, 4294967295 -; nextln: br %r14 +; block0: +; xilf %r2, 4294967295 +; br %r14 function %bnot_i8(i8) -> i8 { block0(v0: i8): @@ -453,12 +471,9 @@ block0(v0: i8): return v1 } -; check: xilf %r2, 4294967295 -; nextln: br %r14 - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; BITSELECT -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; block0: +; xilf %r2, 4294967295 +; br %r14 function %bitselect_i64(i64, i64, i64) -> i64 { block0(v0: i64, v1: i64, v2: i64): @@ -466,12 +481,13 @@ block0(v0: i64, v1: i64, v2: i64): return v3 } -; check: ngr %r3, %r2 -; nextln: ngrk %r2, %r4, %r2 -; nextln: xilf %r2, 4294967295 -; nextln: xihf %r2, 4294967295 -; nextln: ogr %r2, %r3 -; nextln: br %r14 +; block0: +; ngrk %r5, %r3, %r2 +; ngrk %r3, %r4, %r2 +; xilf %r3, 4294967295 +; xihf %r3, 4294967295 +; ogrk %r2, %r3, %r5 +; br %r14 function %bitselect_i32(i32, i32, i32) -> i32 { block0(v0: i32, v1: i32, v2: i32): @@ -479,11 +495,12 @@ block0(v0: i32, v1: i32, v2: i32): return v3 } -; check: nr %r3, %r2 -; nextln: nrk %r2, %r4, %r2 -; nextln: xilf %r2, 4294967295 -; nextln: or %r2, %r3 -; nextln: br %r14 +; block0: +; nrk %r5, %r3, %r2 +; nrk %r3, %r4, %r2 +; xilf %r3, 4294967295 +; ork %r2, %r3, %r5 +; br %r14 function %bitselect_i16(i16, i16, i16) -> i16 { block0(v0: i16, v1: i16, v2: i16): @@ -491,11 +508,12 @@ block0(v0: i16, v1: i16, v2: i16): return v3 } -; check: nr %r3, %r2 -; nextln: nrk %r2, %r4, %r2 -; nextln: xilf %r2, 4294967295 -; nextln: or %r2, %r3 -; nextln: br %r14 +; block0: +; nrk %r5, %r3, %r2 +; nrk %r3, %r4, %r2 +; xilf %r3, 4294967295 +; ork %r2, %r3, %r5 +; br %r14 function %bitselect_i8(i8, i8, i8) -> i8 { block0(v0: i8, v1: i8, v2: i8): @@ -503,9 +521,10 @@ block0(v0: i8, v1: i8, v2: i8): return v3 } -; check: nr %r3, %r2 -; nextln: nrk %r2, %r4, %r2 -; nextln: xilf %r2, 4294967295 -; nextln: or %r2, %r3 -; nextln: br %r14 +; block0: +; nrk %r5, %r3, %r2 +; nrk %r3, %r4, %r2 +; xilf %r3, 4294967295 +; ork %r2, %r3, %r5 +; br %r14 diff --git a/cranelift/filetests/filetests/isa/s390x/call.clif b/cranelift/filetests/filetests/isa/s390x/call.clif index 4fee8cf9f8..bd3648316b 100644 --- a/cranelift/filetests/filetests/isa/s390x/call.clif +++ b/cranelift/filetests/filetests/isa/s390x/call.clif @@ -1,4 +1,4 @@ -test compile +test compile precise-output target s390x ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; @@ -13,13 +13,14 @@ block0(v0: i64): return v1 } -; check: stmg %r14, %r15, 112(%r15) -; nextln: aghi %r15, -160 -; nextln: virtual_sp_offset_adjust 160 -; nextln: bras %r1, 12 ; data %g + 0 ; lg %r3, 0(%r1) -; nextln: basr %r14, %r3 -; nextln: lmg %r14, %r15, 272(%r15) -; nextln: br %r14 +; stmg %r14, %r15, 112(%r15) +; aghi %r15, -160 +; virtual_sp_offset_adjust 160 +; block0: +; bras %r1, 12 ; data %g + 0 ; lg %r3, 0(%r1) +; basr %r14, %r3 +; lmg %r14, %r15, 272(%r15) +; br %r14 function %call_uext(i32) -> i64 { fn0 = %g(i32 uext) -> i64 @@ -29,22 +30,24 @@ block0(v0: i32): return v1 } -; check: stmg %r14, %r15, 112(%r15) -; nextln: aghi %r15, -160 -; nextln: virtual_sp_offset_adjust 160 -; nextln: llgfr %r2, %r2 -; nextln: bras %r1, 12 ; data %g + 0 ; lg %r3, 0(%r1) -; nextln: basr %r14, %r3 -; nextln: lmg %r14, %r15, 272(%r15) -; nextln: br %r14 +; stmg %r14, %r15, 112(%r15) +; aghi %r15, -160 +; virtual_sp_offset_adjust 160 +; block0: +; llgfr %r2, %r2 +; bras %r1, 12 ; data %g + 0 ; lg %r3, 0(%r1) +; basr %r14, %r3 +; lmg %r14, %r15, 272(%r15) +; br %r14 function %ret_uext(i32) -> i32 uext { block0(v0: i32): return v0 } -; check: llgfr %r2, %r2 -; nextln: br %r14 +; block0: +; llgfr %r2, %r2 +; br %r14 function %call_uext(i32) -> i64 { fn0 = %g(i32 sext) -> i64 @@ -54,22 +57,24 @@ block0(v0: i32): return v1 } -; check: stmg %r14, %r15, 112(%r15) -; nextln: aghi %r15, -160 -; nextln: virtual_sp_offset_adjust 160 -; nextln: lgfr %r2, %r2 -; nextln: bras %r1, 12 ; data %g + 0 ; lg %r3, 0(%r1) -; nextln: basr %r14, %r3 -; nextln: lmg %r14, %r15, 272(%r15) -; nextln: br %r14 +; stmg %r14, %r15, 112(%r15) +; aghi %r15, -160 +; virtual_sp_offset_adjust 160 +; block0: +; lgfr %r2, %r2 +; bras %r1, 12 ; data %g + 0 ; lg %r3, 0(%r1) +; basr %r14, %r3 +; lmg %r14, %r15, 272(%r15) +; br %r14 function %ret_uext(i32) -> i32 sext { block0(v0: i32): return v0 } -; check: lgfr %r2, %r2 -; nextln: br %r14 +; block0: +; lgfr %r2, %r2 +; br %r14 function %call_colocated(i64) -> i64 { fn0 = colocated %g(i64) -> i64 @@ -79,12 +84,13 @@ block0(v0: i64): return v1 } -; check: stmg %r14, %r15, 112(%r15) -; nextln: aghi %r15, -160 -; nextln: virtual_sp_offset_adjust 160 -; nextln: brasl %r14, %g -; nextln: lmg %r14, %r15, 272(%r15) -; nextln: br %r14 +; stmg %r14, %r15, 112(%r15) +; aghi %r15, -160 +; virtual_sp_offset_adjust 160 +; block0: +; brasl %r14, %g +; lmg %r14, %r15, 272(%r15) +; br %r14 function %f2(i32) -> i64 { fn0 = %g(i32 uext) -> i64 @@ -94,9 +100,15 @@ block0(v0: i32): return v1 } -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; CALL_INDIRECT -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; stmg %r14, %r15, 112(%r15) +; aghi %r15, -160 +; virtual_sp_offset_adjust 160 +; block0: +; llgfr %r2, %r2 +; bras %r1, 12 ; data %g + 0 ; lg %r3, 0(%r1) +; basr %r14, %r3 +; lmg %r14, %r15, 272(%r15) +; br %r14 function %call_indirect(i64, i64) -> i64 { sig0 = (i64) -> i64 @@ -105,9 +117,11 @@ block0(v0: i64, v1: i64): return v2 } -; check: stmg %r14, %r15, 112(%r15) -; nextln: aghi %r15, -160 -; nextln: virtual_sp_offset_adjust 160 -; nextln: basr %r14, %r3 -; nextln: lmg %r14, %r15, 272(%r15) -; nextln: br %r14 +; stmg %r14, %r15, 112(%r15) +; aghi %r15, -160 +; virtual_sp_offset_adjust 160 +; block0: +; basr %r14, %r3 +; lmg %r14, %r15, 272(%r15) +; br %r14 + diff --git a/cranelift/filetests/filetests/isa/s390x/condbr.clif b/cranelift/filetests/filetests/isa/s390x/condbr.clif index 12b81b705c..9aa2bf4197 100644 --- a/cranelift/filetests/filetests/isa/s390x/condbr.clif +++ b/cranelift/filetests/filetests/isa/s390x/condbr.clif @@ -1,4 +1,4 @@ -test compile +test compile precise-output target s390x function %f(i64, i64) -> b1 { @@ -7,10 +7,11 @@ block0(v0: i64, v1: i64): return v2 } -; check: clgr %r2, %r3 -; nextln: lhi %r2, 0 -; nextln: lochie %r2, 1 -; nextln: br %r14 +; block0: +; clgr %r2, %r3 +; lhi %r2, 0 +; lochie %r2, 1 +; br %r14 function %f(i64, i64) -> i64 { block0(v0: i64, v1: i64): @@ -27,15 +28,15 @@ block2: return v5 } -; check: Block 0: -; check: clgr %r2, %r3 -; nextln: jge label1 ; jg label2 -; check: Block 1: -; check: lghi %r2, 1 -; nextln: br %r14 -; check: Block 2: -; check: lghi %r2, 2 -; nextln: br %r14 +; block0: +; clgr %r2, %r3 +; jge label1 ; jg label2 +; block1: +; lghi %r2, 1 +; br %r14 +; block2: +; lghi %r2, 2 +; br %r14 function %f(i64, i64) -> i64 { block0(v0: i64, v1: i64): @@ -48,15 +49,14 @@ block1: return v4 } -; FIXME: Should optimize away branches +; block0: +; clgr %r2, %r3 +; jge label1 ; jg label2 +; block1: +; jg label3 +; block2: +; jg label3 +; block3: +; lghi %r2, 1 +; br %r14 -; check: Block 0: -; check: clgr %r2, %r3 -; nextln: jge label1 ; jg label2 -; check: Block 1: -; check: jg label3 -; check: Block 2: -; check: jg label3 -; check: Block 3: -; check: lghi %r2, 1 -; nextln: br %r14 diff --git a/cranelift/filetests/filetests/isa/s390x/condops.clif b/cranelift/filetests/filetests/isa/s390x/condops.clif index aaf1c3ba54..5a73e0ae1e 100644 --- a/cranelift/filetests/filetests/isa/s390x/condops.clif +++ b/cranelift/filetests/filetests/isa/s390x/condops.clif @@ -1,4 +1,4 @@ -test compile +test compile precise-output target s390x function %f(i8, i64, i64) -> i64 { @@ -9,11 +9,12 @@ block0(v0: i8, v1: i64, v2: i64): return v5 } -; check: llcr %r2, %r2 -; nextln: clfi %r2, 42 -; nextln: locgre %r4, %r3 -; nextln: lgr %r2, %r4 -; nextln: br %r14 +; block0: +; llcr %r5, %r2 +; clfi %r5, 42 +; lgr %r2, %r4 +; locgre %r2, %r3 +; br %r14 function %g(b1, i8, i8) -> i8 { block0(v0: b1, v1: i8, v2: i8): @@ -21,13 +22,12 @@ block0(v0: b1, v1: i8, v2: i8): return v3 } -; FIXME: optimize i8/i16 compares - -; check: llcr %r2, %r2 -; nextln: chi %r2, 0 -; nextln: locrlh %r4, %r3 -; nextln: lr %r2, %r4 -; nextln: br %r14 +; block0: +; llcr %r5, %r2 +; chi %r5, 0 +; lgr %r2, %r4 +; locrlh %r2, %r3 +; br %r14 function %i(i32, i8, i8) -> i8 { block0(v0: i32, v1: i8, v2: i8): @@ -37,7 +37,9 @@ block0(v0: i32, v1: i8, v2: i8): return v5 } -; check: clfi %r2, 42 -; nextln: locre %r4, %r3 -; nextln: lr %r2, %r4 -; nextln: br %r14 +; block0: +; clfi %r2, 42 +; lgr %r2, %r4 +; locre %r2, %r3 +; br %r14 + diff --git a/cranelift/filetests/filetests/isa/s390x/constants.clif b/cranelift/filetests/filetests/isa/s390x/constants.clif index 96effdecde..9a9025873b 100644 --- a/cranelift/filetests/filetests/isa/s390x/constants.clif +++ b/cranelift/filetests/filetests/isa/s390x/constants.clif @@ -1,4 +1,4 @@ -test compile +test compile precise-output target s390x function %f() -> b8 { @@ -7,8 +7,9 @@ block0: return v0 } -; check: lhi %r2, 255 -; nextln: br %r14 +; block0: +; lhi %r2, 255 +; br %r14 function %f() -> b16 { block0: @@ -16,8 +17,9 @@ block0: return v0 } -; check: lhi %r2, 0 -; nextln: br %r14 +; block0: +; lhi %r2, 0 +; br %r14 function %f() -> i64 { block0: @@ -25,8 +27,9 @@ block0: return v0 } -; check: lghi %r2, 0 -; nextln: br %r14 +; block0: +; lghi %r2, 0 +; br %r14 function %f() -> i64 { block0: @@ -34,8 +37,9 @@ block0: return v0 } -; check: lgfi %r2, 65535 -; nextln: br %r14 +; block0: +; lgfi %r2, 65535 +; br %r14 function %f() -> i64 { block0: @@ -43,8 +47,9 @@ block0: return v0 } -; check: llilh %r2, 65535 -; nextln: br %r14 +; block0: +; llilh %r2, 65535 +; br %r14 function %f() -> i64 { block0: @@ -52,8 +57,9 @@ block0: return v0 } -; check: llihl %r2, 65535 -; nextln: br %r14 +; block0: +; llihl %r2, 65535 +; br %r14 function %f() -> i64 { block0: @@ -61,8 +67,9 @@ block0: return v0 } -; check: llihh %r2, 65535 -; nextln: br %r14 +; block0: +; llihh %r2, 65535 +; br %r14 function %f() -> i64 { block0: @@ -70,8 +77,9 @@ block0: return v0 } -; check: lghi %r2, -1 -; nextln: br %r14 +; block0: +; lghi %r2, -1 +; br %r14 function %f() -> i64 { block0: @@ -79,28 +87,31 @@ block0: return v0 } -; check: lgfi %r2, -65536 -; nextln: br %r14 +; block0: +; lgfi %r2, -65536 +; br %r14 function %f() -> i64 { block0: - v0 = iconst.i64 0xf34bf0a31212003a ; random digits + v0 = iconst.i64 0xf34bf0a31212003a ;; random digits return v0 } -; check: llihf %r2, 4081840291 -; nextln: iilf %r2, 303169594 -; nextln: br %r14 +; block0: +; llihf %r2, 4081840291 +; iilf %r2, 303169594 +; br %r14 function %f() -> i64 { block0: - v0 = iconst.i64 0x12e900001ef40000 ; random digits with 2 clear half words + v0 = iconst.i64 0x12e900001ef40000 ;; random digits with 2 clear half words return v0 } -; check: llihh %r2, 4841 -; nextln: iilh %r2, 7924 -; nextln: br %r14 +; block0: +; llihh %r2, 4841 +; iilh %r2, 7924 +; br %r14 function %f() -> i32 { block0: @@ -108,6 +119,7 @@ block0: return v0 } -; check: lhi %r2, -1 -; nextln: br %r14 +; block0: +; lhi %r2, -1 +; br %r14 diff --git a/cranelift/filetests/filetests/isa/s390x/conversions.clif b/cranelift/filetests/filetests/isa/s390x/conversions.clif index 5d57c8881a..aaf7c48a57 100644 --- a/cranelift/filetests/filetests/isa/s390x/conversions.clif +++ b/cranelift/filetests/filetests/isa/s390x/conversions.clif @@ -1,4 +1,4 @@ -test compile +test compile precise-output target s390x ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; @@ -11,8 +11,9 @@ block0(v0: i32): return v1 } -; check: llgfr %r2, %r2 -; nextln: br %r14 +; block0: +; llgfr %r2, %r2 +; br %r14 function %uextend_i16_i64(i16) -> i64 { block0(v0: i16): @@ -20,8 +21,9 @@ block0(v0: i16): return v1 } -; check: llghr %r2, %r2 -; nextln: br %r14 +; block0: +; llghr %r2, %r2 +; br %r14 function %uextend_i16_i32(i16) -> i32 { block0(v0: i16): @@ -29,8 +31,9 @@ block0(v0: i16): return v1 } -; check: llhr %r2, %r2 -; nextln: br %r14 +; block0: +; llhr %r2, %r2 +; br %r14 function %uextend_i8_i64(i8) -> i64 { block0(v0: i8): @@ -38,8 +41,9 @@ block0(v0: i8): return v1 } -; check: llgcr %r2, %r2 -; nextln: br %r14 +; block0: +; llgcr %r2, %r2 +; br %r14 function %uextend_i8_i32(i8) -> i32 { block0(v0: i8): @@ -47,8 +51,9 @@ block0(v0: i8): return v1 } -; check: llcr %r2, %r2 -; nextln: br %r14 +; block0: +; llcr %r2, %r2 +; br %r14 function %uextend_i8_i16(i8) -> i16 { block0(v0: i8): @@ -56,13 +61,9 @@ block0(v0: i8): return v1 } -; check: llcr %r2, %r2 -; nextln: br %r14 - - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; SEXTEND -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; block0: +; llcr %r2, %r2 +; br %r14 function %sextend_i32_i64(i32) -> i64 { block0(v0: i32): @@ -70,8 +71,9 @@ block0(v0: i32): return v1 } -; check: lgfr %r2, %r2 -; nextln: br %r14 +; block0: +; lgfr %r2, %r2 +; br %r14 function %sextend_i16_i64(i16) -> i64 { block0(v0: i16): @@ -79,8 +81,9 @@ block0(v0: i16): return v1 } -; check: lghr %r2, %r2 -; nextln: br %r14 +; block0: +; lghr %r2, %r2 +; br %r14 function %sextend_i16_i32(i16) -> i32 { block0(v0: i16): @@ -88,8 +91,9 @@ block0(v0: i16): return v1 } -; check: lhr %r2, %r2 -; nextln: br %r14 +; block0: +; lhr %r2, %r2 +; br %r14 function %sextend_i8_i64(i8) -> i64 { block0(v0: i8): @@ -97,8 +101,9 @@ block0(v0: i8): return v1 } -; check: lgbr %r2, %r2 -; nextln: br %r14 +; block0: +; lgbr %r2, %r2 +; br %r14 function %sextend_i8_i32(i8) -> i32 { block0(v0: i8): @@ -106,8 +111,9 @@ block0(v0: i8): return v1 } -; check: lbr %r2, %r2 -; nextln: br %r14 +; block0: +; lbr %r2, %r2 +; br %r14 function %sextend_i8_i16(i8) -> i16 { block0(v0: i8): @@ -115,13 +121,9 @@ block0(v0: i8): return v1 } -; check: lbr %r2, %r2 -; nextln: br %r14 - - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; IREDUCE -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; block0: +; lbr %r2, %r2 +; br %r14 function %ireduce_i64_i32(i64, i64) -> i32 { block0(v0: i64, v1: i64): @@ -129,8 +131,9 @@ block0(v0: i64, v1: i64): return v2 } -; check: lr %r2, %r3 -; nextln: br %r14 +; block0: +; lgr %r2, %r3 +; br %r14 function %ireduce_i64_i16(i64, i64) -> i16 { block0(v0: i64, v1: i64): @@ -138,8 +141,9 @@ block0(v0: i64, v1: i64): return v2 } -; check: lr %r2, %r3 -; nextln: br %r14 +; block0: +; lgr %r2, %r3 +; br %r14 function %ireduce_i64_i8(i64, i64) -> i8 { block0(v0: i64, v1: i64): @@ -147,8 +151,9 @@ block0(v0: i64, v1: i64): return v2 } -; check: lr %r2, %r3 -; nextln: br %r14 +; block0: +; lgr %r2, %r3 +; br %r14 function %ireduce_i32_i16(i32, i32) -> i16 { block0(v0: i32, v1: i32): @@ -156,8 +161,9 @@ block0(v0: i32, v1: i32): return v2 } -; check: lr %r2, %r3 -; nextln: br %r14 +; block0: +; lgr %r2, %r3 +; br %r14 function %ireduce_i32_i8(i32, i32) -> i8 { block0(v0: i32, v1: i32): @@ -165,8 +171,9 @@ block0(v0: i32, v1: i32): return v2 } -; check: lr %r2, %r3 -; nextln: br %r14 +; block0: +; lgr %r2, %r3 +; br %r14 function %ireduce_i16_i8(i16, i16) -> i8 { block0(v0: i16, v1: i16): @@ -174,12 +181,9 @@ block0(v0: i16, v1: i16): return v2 } -; check: lr %r2, %r3 -; nextln: br %r14 - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; BEXTEND -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; block0: +; lgr %r2, %r3 +; br %r14 function %bextend_b32_b64(b32) -> b64 { block0(v0: b32): @@ -187,8 +191,9 @@ block0(v0: b32): return v1 } -; check: lgfr %r2, %r2 -; nextln: br %r14 +; block0: +; lgfr %r2, %r2 +; br %r14 function %bextend_b16_b64(b16) -> b64 { block0(v0: b16): @@ -196,8 +201,9 @@ block0(v0: b16): return v1 } -; check: lghr %r2, %r2 -; nextln: br %r14 +; block0: +; lghr %r2, %r2 +; br %r14 function %bextend_b16_b32(b16) -> b32 { block0(v0: b16): @@ -205,8 +211,9 @@ block0(v0: b16): return v1 } -; check: lhr %r2, %r2 -; nextln: br %r14 +; block0: +; lhr %r2, %r2 +; br %r14 function %bextend_b8_b64(b8) -> b64 { block0(v0: b8): @@ -214,8 +221,9 @@ block0(v0: b8): return v1 } -; check: lgbr %r2, %r2 -; nextln: br %r14 +; block0: +; lgbr %r2, %r2 +; br %r14 function %bextend_b8_b32(b8) -> b32 { block0(v0: b8): @@ -223,8 +231,9 @@ block0(v0: b8): return v1 } -; check: lbr %r2, %r2 -; nextln: br %r14 +; block0: +; lbr %r2, %r2 +; br %r14 function %bextend_b8_b16(b8) -> b16 { block0(v0: b8): @@ -232,8 +241,9 @@ block0(v0: b8): return v1 } -; check: lbr %r2, %r2 -; nextln: br %r14 +; block0: +; lbr %r2, %r2 +; br %r14 function %bextend_b1_b64(b1) -> b64 { block0(v0: b1): @@ -241,9 +251,10 @@ block0(v0: b1): return v1 } -; check: sllg %r2, %r2, 63 -; nextln: srag %r2, %r2, 63 -; nextln: br %r14 +; block0: +; sllg %r5, %r2, 63 +; srag %r2, %r5, 63 +; br %r14 function %bextend_b1_b32(b1) -> b32 { block0(v0: b1): @@ -251,9 +262,10 @@ block0(v0: b1): return v1 } -; check: sllk %r2, %r2, 31 -; nextln: srak %r2, %r2, 31 -; nextln: br %r14 +; block0: +; sllk %r5, %r2, 31 +; srak %r2, %r5, 31 +; br %r14 function %bextend_b1_b16(b1) -> b16 { block0(v0: b1): @@ -261,9 +273,10 @@ block0(v0: b1): return v1 } -; check: sllk %r2, %r2, 31 -; nextln: srak %r2, %r2, 31 -; nextln: br %r14 +; block0: +; sllk %r5, %r2, 31 +; srak %r2, %r5, 31 +; br %r14 function %bextend_b1_b8(b1) -> b8 { block0(v0: b1): @@ -271,13 +284,10 @@ block0(v0: b1): return v1 } -; check: sllk %r2, %r2, 31 -; nextln: srak %r2, %r2, 31 -; nextln: br %r14 - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; BREDUCE -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; block0: +; sllk %r5, %r2, 31 +; srak %r2, %r5, 31 +; br %r14 function %breduce_b64_b32(b64, b64) -> b32 { block0(v0: b64, v1: b64): @@ -285,8 +295,9 @@ block0(v0: b64, v1: b64): return v2 } -; check: lr %r2, %r3 -; nextln: br %r14 +; block0: +; lgr %r2, %r3 +; br %r14 function %breduce_b64_b16(b64, b64) -> b16 { block0(v0: b64, v1: b64): @@ -294,8 +305,9 @@ block0(v0: b64, v1: b64): return v2 } -; check: lr %r2, %r3 -; nextln: br %r14 +; block0: +; lgr %r2, %r3 +; br %r14 function %breduce_b64_b8(b64, b64) -> b8 { block0(v0: b64, v1: b64): @@ -303,8 +315,9 @@ block0(v0: b64, v1: b64): return v2 } -; check: lr %r2, %r3 -; nextln: br %r14 +; block0: +; lgr %r2, %r3 +; br %r14 function %breduce_b64_b1(b64, b64) -> b1 { block0(v0: b64, v1: b64): @@ -312,8 +325,9 @@ block0(v0: b64, v1: b64): return v2 } -; check: lr %r2, %r3 -; nextln: br %r14 +; block0: +; lgr %r2, %r3 +; br %r14 function %breduce_b32_b16(b32, b32) -> b16 { block0(v0: b32, v1: b32): @@ -321,8 +335,9 @@ block0(v0: b32, v1: b32): return v2 } -; check: lr %r2, %r3 -; nextln: br %r14 +; block0: +; lgr %r2, %r3 +; br %r14 function %breduce_b32_b8(b32, b32) -> b8 { block0(v0: b32, v1: b32): @@ -330,8 +345,9 @@ block0(v0: b32, v1: b32): return v2 } -; check: lr %r2, %r3 -; nextln: br %r14 +; block0: +; lgr %r2, %r3 +; br %r14 function %breduce_b32_b1(b32, b32) -> b1 { block0(v0: b32, v1: b32): @@ -339,8 +355,9 @@ block0(v0: b32, v1: b32): return v2 } -; check: lr %r2, %r3 -; nextln: br %r14 +; block0: +; lgr %r2, %r3 +; br %r14 function %breduce_b16_b8(b16, b16) -> b8 { block0(v0: b16, v1: b16): @@ -348,8 +365,9 @@ block0(v0: b16, v1: b16): return v2 } -; check: lr %r2, %r3 -; nextln: br %r14 +; block0: +; lgr %r2, %r3 +; br %r14 function %breduce_b16_b1(b16, b16) -> b1 { block0(v0: b16, v1: b16): @@ -357,8 +375,9 @@ block0(v0: b16, v1: b16): return v2 } -; check: lr %r2, %r3 -; nextln: br %r14 +; block0: +; lgr %r2, %r3 +; br %r14 function %breduce_b8_b1(b8, b8) -> b1 { block0(v0: b8, v1: b8): @@ -366,12 +385,9 @@ block0(v0: b8, v1: b8): return v2 } -; check: lr %r2, %r3 -; nextln: br %r14 - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; BMASK -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; block0: +; lgr %r2, %r3 +; br %r14 function %bmask_b64_i64(b64, b64) -> i64 { block0(v0: b64, v1: b64): @@ -379,8 +395,9 @@ block0(v0: b64, v1: b64): return v2 } -; check: lgr %r2, %r3 -; nextln: br %r14 +; block0: +; lgr %r2, %r3 +; br %r14 function %bmask_b64_i32(b64, b64) -> i32 { block0(v0: b64, v1: b64): @@ -388,8 +405,9 @@ block0(v0: b64, v1: b64): return v2 } -; check: lr %r2, %r3 -; nextln: br %r14 +; block0: +; lgr %r2, %r3 +; br %r14 function %bmask_b64_i16(b64, b64) -> i16 { block0(v0: b64, v1: b64): @@ -397,8 +415,9 @@ block0(v0: b64, v1: b64): return v2 } -; check: lr %r2, %r3 -; nextln: br %r14 +; block0: +; lgr %r2, %r3 +; br %r14 function %bmask_b64_i8(b64, b64) -> i8 { block0(v0: b64, v1: b64): @@ -406,8 +425,9 @@ block0(v0: b64, v1: b64): return v2 } -; check: lr %r2, %r3 -; nextln: br %r14 +; block0: +; lgr %r2, %r3 +; br %r14 function %bmask_b32_i64(b32, b32) -> i64 { block0(v0: b32, v1: b32): @@ -415,8 +435,9 @@ block0(v0: b32, v1: b32): return v2 } -; check: lgfr %r2, %r3 -; nextln: br %r14 +; block0: +; lgfr %r2, %r3 +; br %r14 function %bmask_b32_i32(b32, b32) -> i32 { block0(v0: b32, v1: b32): @@ -424,8 +445,9 @@ block0(v0: b32, v1: b32): return v2 } -; check: lr %r2, %r3 -; nextln: br %r14 +; block0: +; lgr %r2, %r3 +; br %r14 function %bmask_b32_i16(b32, b32) -> i16 { block0(v0: b32, v1: b32): @@ -433,8 +455,9 @@ block0(v0: b32, v1: b32): return v2 } -; check: lr %r2, %r3 -; nextln: br %r14 +; block0: +; lgr %r2, %r3 +; br %r14 function %bmask_b32_i8(b32, b32) -> i8 { block0(v0: b32, v1: b32): @@ -442,8 +465,9 @@ block0(v0: b32, v1: b32): return v2 } -; check: lr %r2, %r3 -; nextln: br %r14 +; block0: +; lgr %r2, %r3 +; br %r14 function %bmask_b16_i64(b16, b16) -> i64 { block0(v0: b16, v1: b16): @@ -451,8 +475,9 @@ block0(v0: b16, v1: b16): return v2 } -; check: lghr %r2, %r3 -; nextln: br %r14 +; block0: +; lghr %r2, %r3 +; br %r14 function %bmask_b16_i32(b16, b16) -> i32 { block0(v0: b16, v1: b16): @@ -460,8 +485,9 @@ block0(v0: b16, v1: b16): return v2 } -; check: lhr %r2, %r3 -; nextln: br %r14 +; block0: +; lhr %r2, %r3 +; br %r14 function %bmask_b16_i16(b16, b16) -> i16 { block0(v0: b16, v1: b16): @@ -469,8 +495,9 @@ block0(v0: b16, v1: b16): return v2 } -; check: lr %r2, %r3 -; nextln: br %r14 +; block0: +; lgr %r2, %r3 +; br %r14 function %bmask_b16_i8(b16, b16) -> i8 { block0(v0: b16, v1: b16): @@ -478,8 +505,9 @@ block0(v0: b16, v1: b16): return v2 } -; check: lr %r2, %r3 -; nextln: br %r14 +; block0: +; lgr %r2, %r3 +; br %r14 function %bmask_b8_i64(b8, b8) -> i64 { block0(v0: b8, v1: b8): @@ -487,8 +515,9 @@ block0(v0: b8, v1: b8): return v2 } -; check: lgbr %r2, %r3 -; nextln: br %r14 +; block0: +; lgbr %r2, %r3 +; br %r14 function %bmask_b8_i32(b8, b8) -> i32 { block0(v0: b8, v1: b8): @@ -496,8 +525,9 @@ block0(v0: b8, v1: b8): return v2 } -; check: lbr %r2, %r3 -; nextln: br %r14 +; block0: +; lbr %r2, %r3 +; br %r14 function %bmask_b8_i16(b8, b8) -> i16 { block0(v0: b8, v1: b8): @@ -505,8 +535,9 @@ block0(v0: b8, v1: b8): return v2 } -; check: lbr %r2, %r3 -; nextln: br %r14 +; block0: +; lbr %r2, %r3 +; br %r14 function %bmask_b8_i8(b8, b8) -> i8 { block0(v0: b8, v1: b8): @@ -514,8 +545,9 @@ block0(v0: b8, v1: b8): return v2 } -; check: lr %r2, %r3 -; nextln: br %r14 +; block0: +; lgr %r2, %r3 +; br %r14 function %bmask_b1_i64(b1, b1) -> i64 { block0(v0: b1, v1: b1): @@ -523,9 +555,10 @@ block0(v0: b1, v1: b1): return v2 } -; check: sllg %r2, %r3, 63 -; nextln: srag %r2, %r2, 63 -; nextln: br %r14 +; block0: +; sllg %r3, %r3, 63 +; srag %r2, %r3, 63 +; br %r14 function %bmask_b1_i32(b1, b1) -> i32 { block0(v0: b1, v1: b1): @@ -533,9 +566,10 @@ block0(v0: b1, v1: b1): return v2 } -; check: sllk %r2, %r3, 31 -; nextln: srak %r2, %r2, 31 -; nextln: br %r14 +; block0: +; sllk %r3, %r3, 31 +; srak %r2, %r3, 31 +; br %r14 function %bmask_b1_i16(b1, b1) -> i16 { block0(v0: b1, v1: b1): @@ -543,9 +577,10 @@ block0(v0: b1, v1: b1): return v2 } -; check: sllk %r2, %r3, 31 -; nextln: srak %r2, %r2, 31 -; nextln: br %r14 +; block0: +; sllk %r3, %r3, 31 +; srak %r2, %r3, 31 +; br %r14 function %bmask_b1_i8(b1, b1) -> i8 { block0(v0: b1, v1: b1): @@ -553,13 +588,10 @@ block0(v0: b1, v1: b1): return v2 } -; check: sllk %r2, %r3, 31 -; nextln: srak %r2, %r2, 31 -; nextln: br %r14 - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; BINT -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; block0: +; sllk %r3, %r3, 31 +; srak %r2, %r3, 31 +; br %r14 function %bint_b64_i64(b64) -> i64 { block0(v0: b64): @@ -567,9 +599,10 @@ block0(v0: b64): return v1 } -; check: lghi %r3, 1 -; nextln: ngr %r2, %r3 -; nextln: br %r14 +; block0: +; lghi %r5, 1 +; ngr %r2, %r5 +; br %r14 function %bint_b64_i32(b64) -> i32 { block0(v0: b64): @@ -577,8 +610,9 @@ block0(v0: b64): return v1 } -; check: nilf %r2, 1 -; nextln: br %r14 +; block0: +; nilf %r2, 1 +; br %r14 function %bint_b64_i16(b64) -> i16 { block0(v0: b64): @@ -586,8 +620,9 @@ block0(v0: b64): return v1 } -; check: nill %r2, 1 -; nextln: br %r14 +; block0: +; nill %r2, 1 +; br %r14 function %bint_b64_i8(b64) -> i8 { block0(v0: b64): @@ -595,8 +630,9 @@ block0(v0: b64): return v1 } -; check: nill %r2, 1 -; nextln: br %r14 +; block0: +; nill %r2, 1 +; br %r14 function %bint_b32_i64(b32) -> i64 { block0(v0: b32): @@ -604,9 +640,10 @@ block0(v0: b32): return v1 } -; check: lghi %r3, 1 -; nextln: ngr %r2, %r3 -; nextln: br %r14 +; block0: +; lghi %r5, 1 +; ngr %r2, %r5 +; br %r14 function %bint_b32_i32(b32) -> i32 { block0(v0: b32): @@ -614,8 +651,9 @@ block0(v0: b32): return v1 } -; check: nilf %r2, 1 -; nextln: br %r14 +; block0: +; nilf %r2, 1 +; br %r14 function %bint_b32_i16(b32) -> i16 { block0(v0: b32): @@ -623,8 +661,9 @@ block0(v0: b32): return v1 } -; check: nill %r2, 1 -; nextln: br %r14 +; block0: +; nill %r2, 1 +; br %r14 function %bint_b32_i8(b32) -> i8 { block0(v0: b32): @@ -632,8 +671,9 @@ block0(v0: b32): return v1 } -; check: nill %r2, 1 -; nextln: br %r14 +; block0: +; nill %r2, 1 +; br %r14 function %bint_b16_i64(b16) -> i64 { block0(v0: b16): @@ -641,9 +681,10 @@ block0(v0: b16): return v1 } -; check: lghi %r3, 1 -; nextln: ngr %r2, %r3 -; nextln: br %r14 +; block0: +; lghi %r5, 1 +; ngr %r2, %r5 +; br %r14 function %bint_b16_i32(b16) -> i32 { block0(v0: b16): @@ -651,8 +692,9 @@ block0(v0: b16): return v1 } -; check: nilf %r2, 1 -; nextln: br %r14 +; block0: +; nilf %r2, 1 +; br %r14 function %bint_b16_i16(b16) -> i16 { block0(v0: b16): @@ -660,8 +702,9 @@ block0(v0: b16): return v1 } -; check: nill %r2, 1 -; nextln: br %r14 +; block0: +; nill %r2, 1 +; br %r14 function %bint_b16_i8(b16) -> i8 { block0(v0: b16): @@ -669,8 +712,9 @@ block0(v0: b16): return v1 } -; check: nill %r2, 1 -; nextln: br %r14 +; block0: +; nill %r2, 1 +; br %r14 function %bint_b8_i64(b8) -> i64 { block0(v0: b8): @@ -678,9 +722,10 @@ block0(v0: b8): return v1 } -; check: lghi %r3, 1 -; nextln: ngr %r2, %r3 -; nextln: br %r14 +; block0: +; lghi %r5, 1 +; ngr %r2, %r5 +; br %r14 function %bint_b8_i32(b8) -> i32 { block0(v0: b8): @@ -688,8 +733,9 @@ block0(v0: b8): return v1 } -; check: nilf %r2, 1 -; nextln: br %r14 +; block0: +; nilf %r2, 1 +; br %r14 function %bint_b8_i16(b8) -> i16 { block0(v0: b8): @@ -697,8 +743,9 @@ block0(v0: b8): return v1 } -; check: nill %r2, 1 -; nextln: br %r14 +; block0: +; nill %r2, 1 +; br %r14 function %bint_b8_i8(b8) -> i8 { block0(v0: b8): @@ -706,8 +753,9 @@ block0(v0: b8): return v1 } -; check: nill %r2, 1 -; nextln: br %r14 +; block0: +; nill %r2, 1 +; br %r14 function %bint_b1_i64(b1) -> i64 { block0(v0: b1): @@ -715,9 +763,10 @@ block0(v0: b1): return v1 } -; check: lghi %r3, 1 -; nextln: ngr %r2, %r3 -; nextln: br %r14 +; block0: +; lghi %r5, 1 +; ngr %r2, %r5 +; br %r14 function %bint_b1_i32(b1) -> i32 { block0(v0: b1): @@ -725,8 +774,9 @@ block0(v0: b1): return v1 } -; check: nilf %r2, 1 -; nextln: br %r14 +; block0: +; nilf %r2, 1 +; br %r14 function %bint_b1_i16(b1) -> i16 { block0(v0: b1): @@ -734,8 +784,9 @@ block0(v0: b1): return v1 } -; check: nill %r2, 1 -; nextln: br %r14 +; block0: +; nill %r2, 1 +; br %r14 function %bint_b1_i8(b1) -> i8 { block0(v0: b1): @@ -743,6 +794,7 @@ block0(v0: b1): return v1 } -; check: nill %r2, 1 -; nextln: br %r14 +; block0: +; nill %r2, 1 +; br %r14 diff --git a/cranelift/filetests/filetests/isa/s390x/div-traps.clif b/cranelift/filetests/filetests/isa/s390x/div-traps.clif index 2d7428b50d..76aaba8d67 100644 --- a/cranelift/filetests/filetests/isa/s390x/div-traps.clif +++ b/cranelift/filetests/filetests/isa/s390x/div-traps.clif @@ -1,4 +1,4 @@ -test compile +test compile precise-output set avoid_div_traps=1 target s390x @@ -12,16 +12,17 @@ block0(v0: i64, v1: i64): return v2 } -; check: lgr %r1, %r2 -; nextln: cgite %r3, 0 -; nextln: llihf %r2, 2147483647 -; nextln: iilf %r2, 4294967295 -; nextln: xgr %r2, %r1 -; nextln: ngr %r2, %r3 -; nextln: cgite %r2, -1 -; nextln: dsgr %r0, %r3 -; nextln: lgr %r2, %r1 -; nextln: br %r14 +; block0: +; lgr %r1, %r2 +; cgite %r3, 0 +; llihf %r5, 2147483647 +; iilf %r5, 4294967295 +; xgrk %r4, %r5, %r1 +; ngrk %r2, %r4, %r3 +; cgite %r2, -1 +; dsgr %r0, %r3 +; lgr %r2, %r1 +; br %r14 function %sdiv_i64_imm(i64) -> i64 { block0(v0: i64): @@ -30,11 +31,12 @@ block0(v0: i64): return v2 } -; check: lgr %r1, %r2 -; nextln: lghi %r2, 2 -; nextln: dsgr %r0, %r2 -; nextln: lgr %r2, %r1 -; nextln: br %r14 +; block0: +; lgr %r1, %r2 +; lghi %r2, 2 +; dsgr %r0, %r2 +; lgr %r2, %r1 +; br %r14 function %sdiv_i32(i32, i32) -> i32 { block0(v0: i32, v1: i32): @@ -42,15 +44,16 @@ block0(v0: i32, v1: i32): return v2 } -; check: lgfr %r1, %r2 -; nextln: cite %r3, 0 -; nextln: iilf %r2, 2147483647 -; nextln: xr %r2, %r1 -; nextln: nr %r2, %r3 -; nextln: cite %r2, -1 -; nextln: dsgfr %r0, %r3 -; nextln: lr %r2, %r1 -; nextln: br %r14 +; block0: +; lgfr %r1, %r2 +; cite %r3, 0 +; iilf %r5, 2147483647 +; xrk %r4, %r5, %r1 +; nrk %r5, %r4, %r3 +; cite %r5, -1 +; dsgfr %r0, %r3 +; lgr %r2, %r1 +; br %r14 function %sdiv_i32_imm(i32) -> i32 { block0(v0: i32): @@ -59,11 +62,12 @@ block0(v0: i32): return v2 } -; check: lgfr %r1, %r2 -; nextln: lhi %r2, 2 -; nextln: dsgfr %r0, %r2 -; nextln: lr %r2, %r1 -; nextln: br %r14 +; block0: +; lgfr %r1, %r2 +; lhi %r2, 2 +; dsgfr %r0, %r2 +; lgr %r2, %r1 +; br %r14 function %sdiv_i16(i16, i16) -> i16 { block0(v0: i16, v1: i16): @@ -71,16 +75,17 @@ block0(v0: i16, v1: i16): return v2 } -; check: lghr %r1, %r2 -; nextln: lhr %r2, %r3 -; nextln: cite %r2, 0 -; nextln: lhi %r3, 32767 -; nextln: xr %r3, %r1 -; nextln: nr %r3, %r2 -; nextln: cite %r3, -1 -; nextln: dsgfr %r0, %r2 -; nextln: lr %r2, %r1 -; nextln: br %r14 +; block0: +; lghr %r1, %r2 +; lhr %r4, %r3 +; cite %r4, 0 +; lhi %r3, 32767 +; xrk %r5, %r3, %r1 +; nrk %r3, %r5, %r4 +; cite %r3, -1 +; dsgfr %r0, %r4 +; lgr %r2, %r1 +; br %r14 function %sdiv_i16_imm(i16) -> i16 { block0(v0: i16): @@ -89,11 +94,12 @@ block0(v0: i16): return v2 } -; check: lghr %r1, %r2 -; nextln: lhi %r2, 2 -; nextln: dsgfr %r0, %r2 -; nextln: lr %r2, %r1 -; nextln: br %r14 +; block0: +; lghr %r1, %r2 +; lhi %r2, 2 +; dsgfr %r0, %r2 +; lgr %r2, %r1 +; br %r14 function %sdiv_i8(i8, i8) -> i8 { block0(v0: i8, v1: i8): @@ -101,16 +107,17 @@ block0(v0: i8, v1: i8): return v2 } -; check: lgbr %r1, %r2 -; nextln: lbr %r2, %r3 -; nextln: cite %r2, 0 -; nextln: lhi %r3, 127 -; nextln: xr %r3, %r1 -; nextln: nr %r3, %r2 -; nextln: cite %r3, -1 -; nextln: dsgfr %r0, %r2 -; nextln: lr %r2, %r1 -; nextln: br %r14 +; block0: +; lgbr %r1, %r2 +; lbr %r4, %r3 +; cite %r4, 0 +; lhi %r3, 127 +; xrk %r5, %r3, %r1 +; nrk %r3, %r5, %r4 +; cite %r3, -1 +; dsgfr %r0, %r4 +; lgr %r2, %r1 +; br %r14 function %sdiv_i8_imm(i8) -> i8 { block0(v0: i8): @@ -119,15 +126,12 @@ block0(v0: i8): return v2 } -; check: lgbr %r1, %r2 -; nextln: lhi %r2, 2 -; nextln: dsgfr %r0, %r2 -; nextln: lr %r2, %r1 -; nextln: br %r14 - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; UDIV -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; block0: +; lgbr %r1, %r2 +; lhi %r2, 2 +; dsgfr %r0, %r2 +; lgr %r2, %r1 +; br %r14 function %udiv_i64(i64, i64) -> i64 { block0(v0: i64, v1: i64): @@ -135,12 +139,13 @@ block0(v0: i64, v1: i64): return v2 } -; check: lghi %r0, 0 -; nextln: lgr %r1, %r2 -; nextln: cgite %r3, 0 -; nextln: dlgr %r0, %r3 -; nextln: lgr %r2, %r1 -; nextln: br %r14 +; block0: +; lghi %r0, 0 +; lgr %r1, %r2 +; cgite %r3, 0 +; dlgr %r0, %r3 +; lgr %r2, %r1 +; br %r14 function %udiv_i64_imm(i64) -> i64 { block0(v0: i64): @@ -149,12 +154,13 @@ block0(v0: i64): return v2 } -; check: lghi %r0, 0 -; nextln: lgr %r1, %r2 -; nextln: lghi %r2, 2 -; nextln: dlgr %r0, %r2 -; nextln: lgr %r2, %r1 -; nextln: br %r14 +; block0: +; lghi %r0, 0 +; lgr %r1, %r2 +; lghi %r3, 2 +; dlgr %r0, %r3 +; lgr %r2, %r1 +; br %r14 function %udiv_i32(i32, i32) -> i32 { block0(v0: i32, v1: i32): @@ -162,12 +168,13 @@ block0(v0: i32, v1: i32): return v2 } -; check: lhi %r0, 0 -; nextln: lr %r1, %r2 -; nextln: cite %r3, 0 -; nextln: dlr %r0, %r3 -; nextln: lr %r2, %r1 -; nextln: br %r14 +; block0: +; lhi %r0, 0 +; lgr %r1, %r2 +; cite %r3, 0 +; dlr %r0, %r3 +; lgr %r2, %r1 +; br %r14 function %udiv_i32_imm(i32) -> i32 { block0(v0: i32): @@ -176,12 +183,13 @@ block0(v0: i32): return v2 } -; check: lhi %r0, 0 -; nextln: lr %r1, %r2 -; nextln: lhi %r2, 2 -; nextln: dlr %r0, %r2 -; nextln: lr %r2, %r1 -; nextln: br %r14 +; block0: +; lhi %r0, 0 +; lgr %r1, %r2 +; lhi %r3, 2 +; dlr %r0, %r3 +; lgr %r2, %r1 +; br %r14 function %udiv_i16(i16, i16) -> i16 { block0(v0: i16, v1: i16): @@ -189,13 +197,14 @@ block0(v0: i16, v1: i16): return v2 } -; check: lhi %r0, 0 -; nextln: llhr %r1, %r2 -; nextln: llhr %r2, %r3 -; nextln: cite %r2, 0 -; nextln: dlr %r0, %r2 -; nextln: lr %r2, %r1 -; nextln: br %r14 +; block0: +; lhi %r0, 0 +; llhr %r1, %r2 +; llhr %r5, %r3 +; cite %r5, 0 +; dlr %r0, %r5 +; lgr %r2, %r1 +; br %r14 function %udiv_i16_imm(i16) -> i16 { block0(v0: i16): @@ -204,12 +213,13 @@ block0(v0: i16): return v2 } -; check: lhi %r0, 0 -; nextln: llhr %r1, %r2 -; nextln: lhi %r2, 2 -; nextln: dlr %r0, %r2 -; nextln: lr %r2, %r1 -; nextln: br %r14 +; block0: +; lhi %r0, 0 +; llhr %r1, %r2 +; lhi %r3, 2 +; dlr %r0, %r3 +; lgr %r2, %r1 +; br %r14 function %udiv_i8(i8, i8) -> i8 { block0(v0: i8, v1: i8): @@ -217,13 +227,14 @@ block0(v0: i8, v1: i8): return v2 } -; check: lhi %r0, 0 -; nextln: llcr %r1, %r2 -; nextln: llcr %r2, %r3 -; nextln: cite %r2, 0 -; nextln: dlr %r0, %r2 -; nextln: lr %r2, %r1 -; nextln: br %r14 +; block0: +; lhi %r0, 0 +; llcr %r1, %r2 +; llcr %r5, %r3 +; cite %r5, 0 +; dlr %r0, %r5 +; lgr %r2, %r1 +; br %r14 function %udiv_i8_imm(i8) -> i8 { block0(v0: i8): @@ -232,16 +243,13 @@ block0(v0: i8): return v2 } -; check: lhi %r0, 0 -; nextln: llcr %r1, %r2 -; nextln: lhi %r2, 2 -; nextln: dlr %r0, %r2 -; nextln: lr %r2, %r1 -; nextln: br %r14 - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; SREM -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; block0: +; lhi %r0, 0 +; llcr %r1, %r2 +; lhi %r3, 2 +; dlr %r0, %r3 +; lgr %r2, %r1 +; br %r14 function %srem_i64(i64, i64) -> i64 { block0(v0: i64, v1: i64): @@ -249,13 +257,14 @@ block0(v0: i64, v1: i64): return v2 } -; check: lgr %r1, %r2 -; nextln: cgite %r3, 0 -; nextln: cghi %r3, -1 -; nextln: locghie %r1, 0 -; nextln: dsgr %r0, %r3 -; nextln: lgr %r2, %r0 -; nextln: br %r14 +; block0: +; lgr %r1, %r2 +; cgite %r3, 0 +; cghi %r3, -1 +; locghie %r1, 0 +; dsgr %r0, %r3 +; lgr %r2, %r0 +; br %r14 function %srem_i32(i32, i32) -> i32 { block0(v0: i32, v1: i32): @@ -263,11 +272,12 @@ block0(v0: i32, v1: i32): return v2 } -; check: lgfr %r1, %r2 -; nextln: cite %r3, 0 -; nextln: dsgfr %r0, %r3 -; nextln: lr %r2, %r0 -; nextln: br %r14 +; block0: +; lgfr %r1, %r2 +; cite %r3, 0 +; dsgfr %r0, %r3 +; lgr %r2, %r0 +; br %r14 function %srem_i16(i16, i16) -> i16 { block0(v0: i16, v1: i16): @@ -275,12 +285,13 @@ block0(v0: i16, v1: i16): return v2 } -; check: lghr %r1, %r2 -; nextln: lhr %r2, %r3 -; nextln: cite %r2, 0 -; nextln: dsgfr %r0, %r2 -; nextln: lr %r2, %r0 -; nextln: br %r14 +; block0: +; lghr %r1, %r2 +; lhr %r4, %r3 +; cite %r4, 0 +; dsgfr %r0, %r4 +; lgr %r2, %r0 +; br %r14 function %srem_i8(i8, i8) -> i8 { block0(v0: i8, v1: i8): @@ -288,16 +299,13 @@ block0(v0: i8, v1: i8): return v2 } -; check: lgbr %r1, %r2 -; nextln: lbr %r2, %r3 -; nextln: cite %r2, 0 -; nextln: dsgfr %r0, %r2 -; nextln: lr %r2, %r0 -; nextln: br %r14 - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; UREM -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; block0: +; lgbr %r1, %r2 +; lbr %r4, %r3 +; cite %r4, 0 +; dsgfr %r0, %r4 +; lgr %r2, %r0 +; br %r14 function %urem_i64(i64, i64) -> i64 { block0(v0: i64, v1: i64): @@ -305,12 +313,13 @@ block0(v0: i64, v1: i64): return v2 } -; check: lghi %r0, 0 -; nextln: lgr %r1, %r2 -; nextln: cgite %r3, 0 -; nextln: dlgr %r0, %r3 -; nextln: lgr %r2, %r0 -; nextln: br %r14 +; block0: +; lghi %r0, 0 +; lgr %r1, %r2 +; cgite %r3, 0 +; dlgr %r0, %r3 +; lgr %r2, %r0 +; br %r14 function %urem_i32(i32, i32) -> i32 { block0(v0: i32, v1: i32): @@ -318,12 +327,13 @@ block0(v0: i32, v1: i32): return v2 } -; check: lhi %r0, 0 -; nextln: lr %r1, %r2 -; nextln: cite %r3, 0 -; nextln: dlr %r0, %r3 -; nextln: lr %r2, %r0 -; nextln: br %r14 +; block0: +; lhi %r0, 0 +; lgr %r1, %r2 +; cite %r3, 0 +; dlr %r0, %r3 +; lgr %r2, %r0 +; br %r14 function %urem_i16(i16, i16) -> i16 { block0(v0: i16, v1: i16): @@ -331,13 +341,14 @@ block0(v0: i16, v1: i16): return v2 } -; check: lhi %r0, 0 -; nextln: llhr %r1, %r2 -; nextln: llhr %r2, %r3 -; nextln: cite %r2, 0 -; nextln: dlr %r0, %r2 -; nextln: lr %r2, %r0 -; nextln: br %r14 +; block0: +; lhi %r0, 0 +; llhr %r1, %r2 +; llhr %r5, %r3 +; cite %r5, 0 +; dlr %r0, %r5 +; lgr %r2, %r0 +; br %r14 function %urem_i8(i8, i8) -> i8 { block0(v0: i8, v1: i8): @@ -345,11 +356,12 @@ block0(v0: i8, v1: i8): return v2 } -; check: lhi %r0, 0 -; nextln: llcr %r1, %r2 -; nextln: llcr %r2, %r3 -; nextln: cite %r2, 0 -; nextln: dlr %r0, %r2 -; nextln: lr %r2, %r0 -; nextln: br %r14 +; block0: +; lhi %r0, 0 +; llcr %r1, %r2 +; llcr %r5, %r3 +; cite %r5, 0 +; dlr %r0, %r5 +; lgr %r2, %r0 +; br %r14 diff --git a/cranelift/filetests/filetests/isa/s390x/fence.clif b/cranelift/filetests/filetests/isa/s390x/fence.clif index 9e97a9f12c..2439ec7a2e 100644 --- a/cranelift/filetests/filetests/isa/s390x/fence.clif +++ b/cranelift/filetests/filetests/isa/s390x/fence.clif @@ -1,4 +1,4 @@ -test compile +test compile precise-output target s390x ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; @@ -11,7 +11,7 @@ block0: return } -; check: bcr 14, 0 -; nextln: br %r14 - +; block0: +; bcr 14, 0 +; br %r14 diff --git a/cranelift/filetests/filetests/isa/s390x/floating-point.clif b/cranelift/filetests/filetests/isa/s390x/floating-point.clif index 6f95c82487..11a5292c8d 100644 --- a/cranelift/filetests/filetests/isa/s390x/floating-point.clif +++ b/cranelift/filetests/filetests/isa/s390x/floating-point.clif @@ -1,4 +1,4 @@ -test compile +test compile precise-output target s390x ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; @@ -14,8 +14,9 @@ block0: return v1 } -; check: bras %r1, 8 ; data.f32 0 ; le %f0, 0(%r1) -; nextln: br %r14 +; block0: +; bras %r1, 8 ; data.f32 0 ; le %f0, 0(%r1) +; br %r14 function %f64const_zero() -> f64 { block0: @@ -23,8 +24,9 @@ block0: return v1 } -; check: bras %r1, 12 ; data.f64 0 ; ld %f0, 0(%r1) -; nextln: br %r14 +; block0: +; bras %r1, 12 ; data.f64 0 ; ld %f0, 0(%r1) +; br %r14 function %f32const_one() -> f32 { block0: @@ -32,8 +34,9 @@ block0: return v1 } -; check: bras %r1, 8 ; data.f32 1 ; le %f0, 0(%r1) -; nextln: br %r14 +; block0: +; bras %r1, 8 ; data.f32 1 ; le %f0, 0(%r1) +; br %r14 function %f64const_one() -> f64 { block0: @@ -41,12 +44,9 @@ block0: return v1 } -; check: bras %r1, 12 ; data.f64 1 ; ld %f0, 0(%r1) -; nextln: br %r14 - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; FADD -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; block0: +; bras %r1, 12 ; data.f64 1 ; ld %f0, 0(%r1) +; br %r14 function %fadd_f32(f32, f32) -> f32 { block0(v0: f32, v1: f32): @@ -54,8 +54,9 @@ block0(v0: f32, v1: f32): return v2 } -; check: aebr %f0, %f2 -; nextln: br %r14 +; block0: +; aebr %f0, %f2 +; br %r14 function %fadd_f64(f64, f64) -> f64 { block0(v0: f64, v1: f64): @@ -63,12 +64,9 @@ block0(v0: f64, v1: f64): return v2 } -; check: adbr %f0, %f2 -; nextln: br %r14 - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; FSUB -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; block0: +; adbr %f0, %f2 +; br %r14 function %fsub_f32(f32, f32) -> f32 { block0(v0: f32, v1: f32): @@ -76,8 +74,9 @@ block0(v0: f32, v1: f32): return v2 } -; check: sebr %f0, %f2 -; nextln: br %r14 +; block0: +; sebr %f0, %f2 +; br %r14 function %fsub_f64(f64, f64) -> f64 { block0(v0: f64, v1: f64): @@ -85,12 +84,9 @@ block0(v0: f64, v1: f64): return v2 } -; check: sdbr %f0, %f2 -; nextln: br %r14 - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; FMUL -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; block0: +; sdbr %f0, %f2 +; br %r14 function %fmul_f32(f32, f32) -> f32 { block0(v0: f32, v1: f32): @@ -98,8 +94,9 @@ block0(v0: f32, v1: f32): return v2 } -; check: meebr %f0, %f2 -; nextln: br %r14 +; block0: +; meebr %f0, %f2 +; br %r14 function %fmul_f64(f64, f64) -> f64 { block0(v0: f64, v1: f64): @@ -107,12 +104,9 @@ block0(v0: f64, v1: f64): return v2 } -; check: mdbr %f0, %f2 -; nextln: br %r14 - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; FDIV -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; block0: +; mdbr %f0, %f2 +; br %r14 function %fdiv_f32(f32, f32) -> f32 { block0(v0: f32, v1: f32): @@ -120,8 +114,9 @@ block0(v0: f32, v1: f32): return v2 } -; check: debr %f0, %f2 -; nextln: br %r14 +; block0: +; debr %f0, %f2 +; br %r14 function %fdiv_f64(f64, f64) -> f64 { block0(v0: f64, v1: f64): @@ -129,12 +124,9 @@ block0(v0: f64, v1: f64): return v2 } -; check: ddbr %f0, %f2 -; nextln: br %r14 - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; FMIN -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; block0: +; ddbr %f0, %f2 +; br %r14 function %fmin_f32(f32, f32) -> f32 { block0(v0: f32, v1: f32): @@ -142,8 +134,9 @@ block0(v0: f32, v1: f32): return v2 } -; check: wfminsb %f0, %f0, %f2, 1 -; nextln: br %r14 +; block0: +; wfminsb %f0, %f0, %f2, 1 +; br %r14 function %fmin_f64(f64, f64) -> f64 { block0(v0: f64, v1: f64): @@ -151,12 +144,9 @@ block0(v0: f64, v1: f64): return v2 } -; check: wfmindb %f0, %f0, %f2, 1 -; nextln: br %r14 - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; FMAX -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; block0: +; wfmindb %f0, %f0, %f2, 1 +; br %r14 function %fmax_f32(f32, f32) -> f32 { block0(v0: f32, v1: f32): @@ -164,8 +154,9 @@ block0(v0: f32, v1: f32): return v2 } -; check: wfmaxsb %f0, %f0, %f2, 1 -; nextln: br %r14 +; block0: +; wfmaxsb %f0, %f0, %f2, 1 +; br %r14 function %fmax_f64(f64, f64) -> f64 { block0(v0: f64, v1: f64): @@ -173,12 +164,9 @@ block0(v0: f64, v1: f64): return v2 } -; check: wfmaxdb %f0, %f0, %f2, 1 -; nextln: br %r14 - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; SQRT -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; block0: +; wfmaxdb %f0, %f0, %f2, 1 +; br %r14 function %sqrt_f32(f32) -> f32 { block0(v0: f32): @@ -186,8 +174,9 @@ block0(v0: f32): return v1 } -; check: sqebr %f0, %f0 -; nextln: br %r14 +; block0: +; sqebr %f0, %f0 +; br %r14 function %sqrt_f64(f64) -> f64 { block0(v0: f64): @@ -195,12 +184,9 @@ block0(v0: f64): return v1 } -; check: sqdbr %f0, %f0 -; nextln: br %r14 - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; FABS -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; block0: +; sqdbr %f0, %f0 +; br %r14 function %fabs_f32(f32) -> f32 { block0(v0: f32): @@ -208,8 +194,9 @@ block0(v0: f32): return v1 } -; check: lpebr %f0, %f0 -; nextln: br %r14 +; block0: +; lpebr %f0, %f0 +; br %r14 function %fabs_f64(f64) -> f64 { block0(v0: f64): @@ -217,12 +204,9 @@ block0(v0: f64): return v1 } -; check: lpdbr %f0, %f0 -; nextln: br %r14 - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; FNEG -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; block0: +; lpdbr %f0, %f0 +; br %r14 function %fneg_f32(f32) -> f32 { block0(v0: f32): @@ -230,8 +214,9 @@ block0(v0: f32): return v1 } -; check: lcebr %f0, %f0 -; nextln: br %r14 +; block0: +; lcebr %f0, %f0 +; br %r14 function %fneg_f64(f64) -> f64 { block0(v0: f64): @@ -239,12 +224,9 @@ block0(v0: f64): return v1 } -; check: lcdbr %f0, %f0 -; nextln: br %r14 - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; FPROMOTE/FDEMOTE -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; block0: +; lcdbr %f0, %f0 +; br %r14 function %fpromote_f32(f32) -> f64 { block0(v0: f32): @@ -252,8 +234,9 @@ block0(v0: f32): return v1 } -; check: ldebr %f0, %f0 -; nextln: br %r14 +; block0: +; ldebr %f0, %f0 +; br %r14 function %fdemote_f64(f64) -> f32 { block0(v0: f64): @@ -261,12 +244,9 @@ block0(v0: f64): return v1 } -; check: ledbr %f0, %f0 -; nextln: br %r14 - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; CEIL -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; block0: +; ledbr %f0, %f0 +; br %r14 function %ceil_f32(f32) -> f32 { block0(v0: f32): @@ -274,8 +254,9 @@ block0(v0: f32): return v1 } -; check: fiebr %f0, %f0, 6 -; nextln: br %r14 +; block0: +; fiebr %f0, %f0, 6 +; br %r14 function %ceil_f64(f64) -> f64 { block0(v0: f64): @@ -283,12 +264,9 @@ block0(v0: f64): return v1 } -; check: fidbr %f0, %f0, 6 -; nextln: br %r14 - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; FLOOR -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; block0: +; fidbr %f0, %f0, 6 +; br %r14 function %floor_f32(f32) -> f32 { block0(v0: f32): @@ -296,8 +274,9 @@ block0(v0: f32): return v1 } -; check: fiebr %f0, %f0, 7 -; nextln: br %r14 +; block0: +; fiebr %f0, %f0, 7 +; br %r14 function %floor_f64(f64) -> f64 { block0(v0: f64): @@ -305,12 +284,9 @@ block0(v0: f64): return v1 } -; check: fidbr %f0, %f0, 7 -; nextln: br %r14 - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; TRUNC -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; block0: +; fidbr %f0, %f0, 7 +; br %r14 function %trunc_f32(f32) -> f32 { block0(v0: f32): @@ -318,8 +294,9 @@ block0(v0: f32): return v1 } -; check: fiebr %f0, %f0, 5 -; nextln: br %r14 +; block0: +; fiebr %f0, %f0, 5 +; br %r14 function %trunc_f64(f64) -> f64 { block0(v0: f64): @@ -327,12 +304,9 @@ block0(v0: f64): return v1 } -; check: fidbr %f0, %f0, 5 -; nextln: br %r14 - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; NEAREST -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; block0: +; fidbr %f0, %f0, 5 +; br %r14 function %nearest_f32(f32) -> f32 { block0(v0: f32): @@ -340,8 +314,9 @@ block0(v0: f32): return v1 } -; check: fiebr %f0, %f0, 4 -; nextln: br %r14 +; block0: +; fiebr %f0, %f0, 4 +; br %r14 function %nearest_f64(f64) -> f64 { block0(v0: f64): @@ -349,12 +324,9 @@ block0(v0: f64): return v1 } -; check: fidbr %f0, %f0, 4 -; nextln: br %r14 - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; FMA -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; block0: +; fidbr %f0, %f0, 4 +; br %r14 function %fma_f32(f32, f32, f32) -> f32 { block0(v0: f32, v1: f32, v2: f32): @@ -362,11 +334,11 @@ block0(v0: f32, v1: f32, v2: f32): return v3 } -; FIXME: regalloc - -; check: maebr %f4, %f0, %f2 -; nextln: ler %f0, %f4 -; nextln: br %r14 +; block0: +; ldr %f1, %f0 +; ldr %f0, %f4 +; maebr %f0, %f1, %f2 +; br %r14 function %fma_f64(f64, f64, f64) -> f64 { block0(v0: f64, v1: f64, v2: f64): @@ -374,13 +346,11 @@ block0(v0: f64, v1: f64, v2: f64): return v3 } -; check: madbr %f4, %f0, %f2 -; nextln: ldr %f0, %f4 -; nextln: br %r14 - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; FCOPYSIGN -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; block0: +; ldr %f1, %f0 +; ldr %f0, %f4 +; madbr %f0, %f1, %f2 +; br %r14 function %fcopysign_f32(f32, f32) -> f32 { block0(v0: f32, v1: f32): @@ -388,8 +358,9 @@ block0(v0: f32, v1: f32): return v2 } -; check: cpsdr %f0, %f2, %f0 -; nextln: br %r14 +; block0: +; cpsdr %f0, %f2, %f0 +; br %r14 function %fcopysign_f64(f64, f64) -> f64 { block0(v0: f64, v1: f64): @@ -397,12 +368,9 @@ block0(v0: f64, v1: f64): return v2 } -; check: cpsdr %f0, %f2, %f0 -; nextln: br %r14 - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; FCVT_TO_UINT/FCVT_TO_SINT -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; block0: +; cpsdr %f0, %f2, %f0 +; br %r14 function %fcvt_to_uint_f32_i32(f32) -> i32 { block0(v0: f32): @@ -410,11 +378,12 @@ block0(v0: f32): return v1 } -; check: cebr %f0, %f0 -; nextln: jno 6 ; trap -; nextln: clfebr %r2, 5, %f0, 0 -; nextln: jno 6 ; trap -; nextln: br %r14 +; block0: +; cebr %f0, %f0 +; jno 6 ; trap +; clfebr %r2, 5, %f0, 0 +; jno 6 ; trap +; br %r14 function %fcvt_to_sint_f32_i32(f32) -> i32 { block0(v0: f32): @@ -422,11 +391,12 @@ block0(v0: f32): return v1 } -; check: cebr %f0, %f0 -; nextln: jno 6 ; trap -; nextln: cfebra %r2, 5, %f0, 0 -; nextln: jno 6 ; trap -; nextln: br %r14 +; block0: +; cebr %f0, %f0 +; jno 6 ; trap +; cfebra %r2, 5, %f0, 0 +; jno 6 ; trap +; br %r14 function %fcvt_to_uint_f32_i64(f32) -> i64 { block0(v0: f32): @@ -434,11 +404,12 @@ block0(v0: f32): return v1 } -; check: cebr %f0, %f0 -; nextln: jno 6 ; trap -; nextln: clgebr %r2, 5, %f0, 0 -; nextln: jno 6 ; trap -; nextln: br %r14 +; block0: +; cebr %f0, %f0 +; jno 6 ; trap +; clgebr %r2, 5, %f0, 0 +; jno 6 ; trap +; br %r14 function %fcvt_to_sint_f32_i64(f32) -> i64 { block0(v0: f32): @@ -446,11 +417,12 @@ block0(v0: f32): return v1 } -; check: cebr %f0, %f0 -; nextln: jno 6 ; trap -; nextln: cgebra %r2, 5, %f0, 0 -; nextln: jno 6 ; trap -; nextln: br %r14 +; block0: +; cebr %f0, %f0 +; jno 6 ; trap +; cgebra %r2, 5, %f0, 0 +; jno 6 ; trap +; br %r14 function %fcvt_to_uint_f64_i32(f64) -> i32 { block0(v0: f64): @@ -458,11 +430,12 @@ block0(v0: f64): return v1 } -; check: cdbr %f0, %f0 -; nextln: jno 6 ; trap -; nextln: clfdbr %r2, 5, %f0, 0 -; nextln: jno 6 ; trap -; nextln: br %r14 +; block0: +; cdbr %f0, %f0 +; jno 6 ; trap +; clfdbr %r2, 5, %f0, 0 +; jno 6 ; trap +; br %r14 function %fcvt_to_sint_f64_i32(f64) -> i32 { block0(v0: f64): @@ -470,11 +443,12 @@ block0(v0: f64): return v1 } -; check: cdbr %f0, %f0 -; nextln: jno 6 ; trap -; nextln: cfdbra %r2, 5, %f0, 0 -; nextln: jno 6 ; trap -; nextln: br %r14 +; block0: +; cdbr %f0, %f0 +; jno 6 ; trap +; cfdbra %r2, 5, %f0, 0 +; jno 6 ; trap +; br %r14 function %fcvt_to_uint_f64_i64(f64) -> i64 { block0(v0: f64): @@ -482,11 +456,12 @@ block0(v0: f64): return v1 } -; check: cdbr %f0, %f0 -; nextln: jno 6 ; trap -; nextln: clgdbr %r2, 5, %f0, 0 -; nextln: jno 6 ; trap -; nextln: br %r14 +; block0: +; cdbr %f0, %f0 +; jno 6 ; trap +; clgdbr %r2, 5, %f0, 0 +; jno 6 ; trap +; br %r14 function %fcvt_to_sint_f64_i64(f64) -> i64 { block0(v0: f64): @@ -494,15 +469,12 @@ block0(v0: f64): return v1 } -; check: cdbr %f0, %f0 -; nextln: jno 6 ; trap -; nextln: cgdbra %r2, 5, %f0, 0 -; nextln: jno 6 ; trap -; nextln: br %r14 - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; FCVT_FROM_UINT/FCVT_FROM_SINT -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; block0: +; cdbr %f0, %f0 +; jno 6 ; trap +; cgdbra %r2, 5, %f0, 0 +; jno 6 ; trap +; br %r14 function %fcvt_from_uint_i32_f32(i32) -> f32 { block0(v0: i32): @@ -510,8 +482,9 @@ block0(v0: i32): return v1 } -; check: celfbr %f0, 0, %r2, 0 -; nextln: br %r14 +; block0: +; celfbr %f0, 0, %r2, 0 +; br %r14 function %fcvt_from_sint_i32_f32(i32) -> f32 { block0(v0: i32): @@ -519,8 +492,9 @@ block0(v0: i32): return v1 } -; check: cefbra %f0, 0, %r2, 0 -; nextln: br %r14 +; block0: +; cefbra %f0, 0, %r2, 0 +; br %r14 function %fcvt_from_uint_i64_f32(i64) -> f32 { block0(v0: i64): @@ -528,8 +502,9 @@ block0(v0: i64): return v1 } -; check: celgbr %f0, 0, %r2, 0 -; nextln: br %r14 +; block0: +; celgbr %f0, 0, %r2, 0 +; br %r14 function %fcvt_from_sint_i64_f32(i64) -> f32 { block0(v0: i64): @@ -537,8 +512,9 @@ block0(v0: i64): return v1 } -; check: cegbra %f0, 0, %r2, 0 -; nextln: br %r14 +; block0: +; cegbra %f0, 0, %r2, 0 +; br %r14 function %fcvt_from_uint_i32_f64(i32) -> f64 { block0(v0: i32): @@ -546,8 +522,9 @@ block0(v0: i32): return v1 } -; check: cdlfbr %f0, 0, %r2, 0 -; nextln: br %r14 +; block0: +; cdlfbr %f0, 0, %r2, 0 +; br %r14 function %fcvt_from_sint_i32_f64(i32) -> f64 { block0(v0: i32): @@ -555,8 +532,9 @@ block0(v0: i32): return v1 } -; check: cdfbra %f0, 0, %r2, 0 -; nextln: br %r14 +; block0: +; cdfbra %f0, 0, %r2, 0 +; br %r14 function %fcvt_from_uint_i64_f64(i64) -> f64 { block0(v0: i64): @@ -564,8 +542,9 @@ block0(v0: i64): return v1 } -; check: cdlgbr %f0, 0, %r2, 0 -; nextln: br %r14 +; block0: +; cdlgbr %f0, 0, %r2, 0 +; br %r14 function %fcvt_from_sint_i64_f64(i64) -> f64 { block0(v0: i64): @@ -573,12 +552,9 @@ block0(v0: i64): return v1 } -; check: cdgbra %f0, 0, %r2, 0 -; nextln: br %r14 - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; FCVT_TO_UINT_SAT/FCVT_TO_SINT_SAT -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; block0: +; cdgbra %f0, 0, %r2, 0 +; br %r14 function %fcvt_to_uint_sat_f32_i32(f32) -> i32 { block0(v0: f32): @@ -586,10 +562,11 @@ block0(v0: f32): return v1 } -; check: clfebr %r2, 5, %f0, 0 -; nextln: cebr %f0, %f0 -; nextln: lochio %r2, 0 -; nextln: br %r14 +; block0: +; clfebr %r2, 5, %f0, 0 +; cebr %f0, %f0 +; lochio %r2, 0 +; br %r14 function %fcvt_to_sint_sat_f32_i32(f32) -> i32 { block0(v0: f32): @@ -597,10 +574,11 @@ block0(v0: f32): return v1 } -; check: cfebra %r2, 5, %f0, 0 -; nextln: cebr %f0, %f0 -; nextln: lochio %r2, 0 -; nextln: br %r14 +; block0: +; cfebra %r2, 5, %f0, 0 +; cebr %f0, %f0 +; lochio %r2, 0 +; br %r14 function %fcvt_to_uint_sat_f32_i64(f32) -> i64 { block0(v0: f32): @@ -608,10 +586,11 @@ block0(v0: f32): return v1 } -; check: clgebr %r2, 5, %f0, 0 -; nextln: cebr %f0, %f0 -; nextln: locghio %r2, 0 -; nextln: br %r14 +; block0: +; clgebr %r2, 5, %f0, 0 +; cebr %f0, %f0 +; locghio %r2, 0 +; br %r14 function %fcvt_to_sint_sat_f32_i64(f32) -> i64 { block0(v0: f32): @@ -619,10 +598,11 @@ block0(v0: f32): return v1 } -; check: cgebra %r2, 5, %f0, 0 -; nextln: cebr %f0, %f0 -; nextln: locghio %r2, 0 -; nextln: br %r14 +; block0: +; cgebra %r2, 5, %f0, 0 +; cebr %f0, %f0 +; locghio %r2, 0 +; br %r14 function %fcvt_to_uint_sat_f64_i32(f64) -> i32 { block0(v0: f64): @@ -630,10 +610,11 @@ block0(v0: f64): return v1 } -; check: clfdbr %r2, 5, %f0, 0 -; nextln: cdbr %f0, %f0 -; nextln: lochio %r2, 0 -; nextln: br %r14 +; block0: +; clfdbr %r2, 5, %f0, 0 +; cdbr %f0, %f0 +; lochio %r2, 0 +; br %r14 function %fcvt_to_sint_sat_f64_i32(f64) -> i32 { block0(v0: f64): @@ -641,10 +622,11 @@ block0(v0: f64): return v1 } -; check: cfdbra %r2, 5, %f0, 0 -; nextln: cdbr %f0, %f0 -; nextln: lochio %r2, 0 -; nextln: br %r14 +; block0: +; cfdbra %r2, 5, %f0, 0 +; cdbr %f0, %f0 +; lochio %r2, 0 +; br %r14 function %fcvt_to_uint_sat_f64_i64(f64) -> i64 { block0(v0: f64): @@ -652,10 +634,11 @@ block0(v0: f64): return v1 } -; check: clgdbr %r2, 5, %f0, 0 -; nextln: cdbr %f0, %f0 -; nextln: locghio %r2, 0 -; nextln: br %r14 +; block0: +; clgdbr %r2, 5, %f0, 0 +; cdbr %f0, %f0 +; locghio %r2, 0 +; br %r14 function %fcvt_to_sint_sat_f64_i64(f64) -> i64 { block0(v0: f64): @@ -663,14 +646,11 @@ block0(v0: f64): return v1 } -; check: cgdbra %r2, 5, %f0, 0 -; nextln: cdbr %f0, %f0 -; nextln: locghio %r2, 0 -; nextln: br %r14 - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; BITCAST -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; block0: +; cgdbra %r2, 5, %f0, 0 +; cdbr %f0, %f0 +; locghio %r2, 0 +; br %r14 function %bitcast_i64_f64(i64) -> f64 { block0(v0: i64): @@ -678,8 +658,9 @@ block0(v0: i64): return v1 } -; check: ldgr %f0, %r2 -; nextln: br %r14 +; block0: +; ldgr %f0, %r2 +; br %r14 function %bitcast_f64_i64(f64) -> i64 { block0(v0: f64): @@ -687,8 +668,9 @@ block0(v0: f64): return v1 } -; check: lgdr %r2, %f0 -; nextln: br %r14 +; block0: +; lgdr %r2, %f0 +; br %r14 function %bitcast_i32_f32(i32) -> f32 { block0(v0: i32): @@ -696,9 +678,10 @@ block0(v0: i32): return v1 } -; check: sllg %r2, %r2, 32 -; nextln: ldgr %f0, %r2 -; nextln: br %r14 +; block0: +; sllg %r5, %r2, 32 +; ldgr %f0, %r5 +; br %r14 function %bitcast_f32_i32(f32) -> i32 { block0(v0: f32): @@ -706,6 +689,8 @@ block0(v0: f32): return v1 } -; check: lgdr %r2, %f0 -; nextln: srlg %r2, %r2, 32 -; nextln: br %r14 +; block0: +; lgdr %r5, %f0 +; srlg %r2, %r5, 32 +; br %r14 + diff --git a/cranelift/filetests/filetests/isa/s390x/fpmem-arch13.clif b/cranelift/filetests/filetests/isa/s390x/fpmem-arch13.clif index 8bad0ca09a..76224768bc 100644 --- a/cranelift/filetests/filetests/isa/s390x/fpmem-arch13.clif +++ b/cranelift/filetests/filetests/isa/s390x/fpmem-arch13.clif @@ -1,4 +1,4 @@ -test compile +test compile precise-output target s390x arch13 function %load_f64_little(i64) -> f64 { @@ -7,8 +7,9 @@ block0(v0: i64): return v1 } -; check: vlebrg %f0, 0(%r2), 0 -; nextln: br %r14 +; block0: +; vlebrg %f0, 0(%r2), 0 +; br %r14 function %load_f32_little(i64) -> f32 { block0(v0: i64): @@ -16,8 +17,9 @@ block0(v0: i64): return v1 } -; check: vlebrf %f0, 0(%r2), 0 -; nextln: br %r14 +; block0: +; vlebrf %f0, 0(%r2), 0 +; br %r14 function %store_f64_little(f64, i64) { block0(v0: f64, v1: i64): @@ -25,8 +27,9 @@ block0(v0: f64, v1: i64): return } -; check: vstebrg %f0, 0(%r2), 0 -; nextln: br %r14 +; block0: +; vstebrg %f0, 0(%r2), 0 +; br %r14 function %store_f32_little(f32, i64) { block0(v0: f32, v1: i64): @@ -34,6 +37,7 @@ block0(v0: f32, v1: i64): return } -; check: vstebrf %f0, 0(%r2), 0 -; nextln: br %r14 +; block0: +; vstebrf %f0, 0(%r2), 0 +; br %r14 diff --git a/cranelift/filetests/filetests/isa/s390x/fpmem.clif b/cranelift/filetests/filetests/isa/s390x/fpmem.clif index ddf37f84f2..49b1ff1aeb 100644 --- a/cranelift/filetests/filetests/isa/s390x/fpmem.clif +++ b/cranelift/filetests/filetests/isa/s390x/fpmem.clif @@ -1,4 +1,4 @@ -test compile +test compile precise-output target s390x function %load_f64(i64) -> f64 { @@ -7,8 +7,9 @@ block0(v0: i64): return v1 } -; check: ld %f0, 0(%r2) -; nextln: br %r14 +; block0: +; ld %f0, 0(%r2) +; br %r14 function %load_f32(i64) -> f32 { block0(v0: i64): @@ -16,8 +17,9 @@ block0(v0: i64): return v1 } -; check: le %f0, 0(%r2) -; nextln: br %r14 +; block0: +; le %f0, 0(%r2) +; br %r14 function %load_f64_little(i64) -> f64 { block0(v0: i64): @@ -25,9 +27,10 @@ block0(v0: i64): return v1 } -; check: lrvg %r2, 0(%r2) -; nextln: ldgr %f0, %r2 -; nextln: br %r14 +; block0: +; lrvg %r5, 0(%r2) +; ldgr %f0, %r5 +; br %r14 function %load_f32_little(i64) -> f32 { block0(v0: i64): @@ -35,10 +38,11 @@ block0(v0: i64): return v1 } -; check: lrv %r2, 0(%r2) -; nextln: sllg %r2, %r2, 32 -; nextln: ldgr %f0, %r2 -; nextln: br %r14 +; block0: +; lrv %r5, 0(%r2) +; sllg %r3, %r5, 32 +; ldgr %f0, %r3 +; br %r14 function %store_f64(f64, i64) { block0(v0: f64, v1: i64): @@ -46,8 +50,9 @@ block0(v0: f64, v1: i64): return } -; check: std %f0, 0(%r2) -; nextln: br %r14 +; block0: +; std %f0, 0(%r2) +; br %r14 function %store_f32(f32, i64) { block0(v0: f32, v1: i64): @@ -55,8 +60,9 @@ block0(v0: f32, v1: i64): return } -; check: ste %f0, 0(%r2) -; nextln: br %r14 +; block0: +; ste %f0, 0(%r2) +; br %r14 function %store_f64_little(f64, i64) { block0(v0: f64, v1: i64): @@ -64,9 +70,10 @@ block0(v0: f64, v1: i64): return } -; check: lgdr %r3, %f0 -; nextln: strvg %r3, 0(%r2) -; nextln: br %r14 +; block0: +; lgdr %r3, %f0 +; strvg %r3, 0(%r2) +; br %r14 function %store_f32_little(f32, i64) { block0(v0: f32, v1: i64): @@ -74,8 +81,9 @@ block0(v0: f32, v1: i64): return } -; check: lgdr %r3, %f0 -; nextln: srlg %r3, %r3, 32 -; nextln: strv %r3, 0(%r2) -; nextln: br %r14 +; block0: +; lgdr %r3, %f0 +; srlg %r4, %r3, 32 +; strv %r4, 0(%r2) +; br %r14 diff --git a/cranelift/filetests/filetests/isa/s390x/heap_addr.clif b/cranelift/filetests/filetests/isa/s390x/heap_addr.clif index c07d5f66a7..7793f4df75 100644 --- a/cranelift/filetests/filetests/isa/s390x/heap_addr.clif +++ b/cranelift/filetests/filetests/isa/s390x/heap_addr.clif @@ -1,4 +1,4 @@ -test compile +test compile precise-output target s390x function %dynamic_heap_check(i64 vmctx, i32) -> i64 { @@ -11,20 +11,20 @@ block0(v0: i64, v1: i32): return v2 } -; check: Block 0: -; check: llgfr %r3, %r3 -; nextln: lghi %r4, 0 -; nextln: ag %r4, 0(%r2) -; nextln: clgr %r3, %r4 -; nextln: jgnh label1 ; jg label2 -; check: Block 1: -; check: agr %r2, %r3 -; nextln: lghi %r5, 0 -; nextln: clgr %r3, %r4 -; nextln: locgrh %r2, %r5 -; nextln: br %r14 -; check: Block 2: -; check: trap +; block0: +; llgfr %r4, %r3 +; lghi %r3, 0 +; ag %r3, 0(%r2) +; clgr %r4, %r3 +; jgnh label1 ; jg label2 +; block1: +; agr %r2, %r4 +; lghi %r5, 0 +; clgr %r4, %r3 +; locgrh %r2, %r5 +; br %r14 +; block2: +; trap function %static_heap_check(i64 vmctx, i32) -> i64 { gv0 = vmctx @@ -35,15 +35,16 @@ block0(v0: i64, v1: i32): return v2 } -; check: Block 0: -; check: llgfr %r3, %r3 -; nextln: clgfi %r3, 65536 -; nextln: jgnh label1 ; jg label2 -; check: Block 1: -; check: agr %r2, %r3 -; nextln: lghi %r4, 0 -; nextln: clgfi %r3, 65536 -; nextln: locgrh %r2, %r4 -; nextln: br %r14 -; check: Block 2: -; check: trap +; block0: +; llgfr %r3, %r3 +; clgfi %r3, 65536 +; jgnh label1 ; jg label2 +; block1: +; agr %r2, %r3 +; lghi %r4, 0 +; clgfi %r3, 65536 +; locgrh %r2, %r4 +; br %r14 +; block2: +; trap + diff --git a/cranelift/filetests/filetests/isa/s390x/icmp.clif b/cranelift/filetests/filetests/isa/s390x/icmp.clif index 50c2120ac7..6d1c2b0ce1 100644 --- a/cranelift/filetests/filetests/isa/s390x/icmp.clif +++ b/cranelift/filetests/filetests/isa/s390x/icmp.clif @@ -1,4 +1,4 @@ -test compile +test compile precise-output target s390x function %icmp_slt_i64(i64, i64) -> b1 { @@ -7,10 +7,11 @@ block0(v0: i64, v1: i64): return v2 } -; check: cgr %r2, %r3 -; nextln: lhi %r2, 0 -; nextln: lochil %r2, 1 -; nextln: br %r14 +; block0: +; cgr %r2, %r3 +; lhi %r2, 0 +; lochil %r2, 1 +; br %r14 function %icmp_slt_i64_ext32(i64, i32) -> b1 { block0(v0: i64, v1: i32): @@ -19,10 +20,11 @@ block0(v0: i64, v1: i32): return v3 } -; check: cgfr %r2, %r3 -; nextln: lhi %r2, 0 -; nextln: lochil %r2, 1 -; nextln: br %r14 +; block0: +; cgfr %r2, %r3 +; lhi %r2, 0 +; lochil %r2, 1 +; br %r14 function %icmp_slt_i64_imm16(i64) -> b1 { block0(v0: i64): @@ -31,10 +33,11 @@ block0(v0: i64): return v2 } -; check: cghi %r2, 1 -; nextln: lhi %r2, 0 -; nextln: lochil %r2, 1 -; nextln: br %r14 +; block0: +; cghi %r2, 1 +; lhi %r2, 0 +; lochil %r2, 1 +; br %r14 function %icmp_slt_i64_imm32(i64) -> b1 { block0(v0: i64): @@ -43,10 +46,11 @@ block0(v0: i64): return v2 } -; check: cgfi %r2, 32768 -; nextln: lhi %r2, 0 -; nextln: lochil %r2, 1 -; nextln: br %r14 +; block0: +; cgfi %r2, 32768 +; lhi %r2, 0 +; lochil %r2, 1 +; br %r14 function %icmp_slt_i64_mem(i64, i64) -> b1 { block0(v0: i64, v1: i64): @@ -55,10 +59,11 @@ block0(v0: i64, v1: i64): return v3 } -; check: cg %r2, 0(%r3) -; nextln: lhi %r2, 0 -; nextln: lochil %r2, 1 -; nextln: br %r14 +; block0: +; cg %r2, 0(%r3) +; lhi %r2, 0 +; lochil %r2, 1 +; br %r14 function %icmp_slt_i64_sym(i64) -> b1 { gv0 = symbol colocated %sym @@ -69,10 +74,11 @@ block0(v0: i64): return v3 } -; check: cgrl %r2, %sym + 0 -; nextln: lhi %r2, 0 -; nextln: lochil %r2, 1 -; nextln: br %r14 +; block0: +; cgrl %r2, %sym + 0 +; lhi %r2, 0 +; lochil %r2, 1 +; br %r14 function %icmp_slt_i64_mem_ext16(i64, i64) -> b1 { block0(v0: i64, v1: i64): @@ -81,10 +87,11 @@ block0(v0: i64, v1: i64): return v3 } -; check: cgh %r2, 0(%r3) -; nextln: lhi %r2, 0 -; nextln: lochil %r2, 1 -; nextln: br %r14 +; block0: +; cgh %r2, 0(%r3) +; lhi %r2, 0 +; lochil %r2, 1 +; br %r14 function %icmp_slt_i64_sym_ext16(i64) -> b1 { gv0 = symbol colocated %sym @@ -95,10 +102,11 @@ block0(v0: i64): return v3 } -; check: cghrl %r2, %sym + 0 -; nextln: lhi %r2, 0 -; nextln: lochil %r2, 1 -; nextln: br %r14 +; block0: +; cghrl %r2, %sym + 0 +; lhi %r2, 0 +; lochil %r2, 1 +; br %r14 function %icmp_slt_i64_mem_ext32(i64, i64) -> b1 { block0(v0: i64, v1: i64): @@ -107,10 +115,11 @@ block0(v0: i64, v1: i64): return v3 } -; check: cgf %r2, 0(%r3) -; nextln: lhi %r2, 0 -; nextln: lochil %r2, 1 -; nextln: br %r14 +; block0: +; cgf %r2, 0(%r3) +; lhi %r2, 0 +; lochil %r2, 1 +; br %r14 function %icmp_slt_i64_sym_ext32(i64) -> b1 { gv0 = symbol colocated %sym @@ -121,10 +130,11 @@ block0(v0: i64): return v3 } -; check: cgfrl %r2, %sym + 0 -; nextln: lhi %r2, 0 -; nextln: lochil %r2, 1 -; nextln: br %r14 +; block0: +; cgfrl %r2, %sym + 0 +; lhi %r2, 0 +; lochil %r2, 1 +; br %r14 function %icmp_slt_i32(i32, i32) -> b1 { block0(v0: i32, v1: i32): @@ -132,10 +142,11 @@ block0(v0: i32, v1: i32): return v2 } -; check: cr %r2, %r3 -; nextln: lhi %r2, 0 -; nextln: lochil %r2, 1 -; nextln: br %r14 +; block0: +; cr %r2, %r3 +; lhi %r2, 0 +; lochil %r2, 1 +; br %r14 function %icmp_slt_i32_imm16(i32) -> b1 { block0(v0: i32): @@ -144,10 +155,11 @@ block0(v0: i32): return v2 } -; check: chi %r2, 1 -; nextln: lhi %r2, 0 -; nextln: lochil %r2, 1 -; nextln: br %r14 +; block0: +; chi %r2, 1 +; lhi %r2, 0 +; lochil %r2, 1 +; br %r14 function %icmp_slt_i32_imm(i32) -> b1 { block0(v0: i32): @@ -156,10 +168,11 @@ block0(v0: i32): return v2 } -; check: cfi %r2, 32768 -; nextln: lhi %r2, 0 -; nextln: lochil %r2, 1 -; nextln: br %r14 +; block0: +; cfi %r2, 32768 +; lhi %r2, 0 +; lochil %r2, 1 +; br %r14 function %icmp_slt_i32_mem(i32, i64) -> b1 { block0(v0: i32, v1: i64): @@ -168,10 +181,11 @@ block0(v0: i32, v1: i64): return v3 } -; check: c %r2, 0(%r3) -; nextln: lhi %r2, 0 -; nextln: lochil %r2, 1 -; nextln: br %r14 +; block0: +; c %r2, 0(%r3) +; lhi %r2, 0 +; lochil %r2, 1 +; br %r14 function %icmp_slt_i32_memoff(i32, i64) -> b1 { block0(v0: i32, v1: i64): @@ -180,10 +194,11 @@ block0(v0: i32, v1: i64): return v3 } -; check: cy %r2, 4096(%r3) -; nextln: lhi %r2, 0 -; nextln: lochil %r2, 1 -; nextln: br %r14 +; block0: +; cy %r2, 4096(%r3) +; lhi %r2, 0 +; lochil %r2, 1 +; br %r14 function %icmp_slt_i32_sym(i32) -> b1 { gv0 = symbol colocated %sym @@ -194,10 +209,11 @@ block0(v0: i32): return v3 } -; check: crl %r2, %sym + 0 -; nextln: lhi %r2, 0 -; nextln: lochil %r2, 1 -; nextln: br %r14 +; block0: +; crl %r2, %sym + 0 +; lhi %r2, 0 +; lochil %r2, 1 +; br %r14 function %icmp_slt_i32_mem_ext16(i32, i64) -> b1 { block0(v0: i32, v1: i64): @@ -206,10 +222,11 @@ block0(v0: i32, v1: i64): return v3 } -; check: ch %r2, 0(%r3) -; nextln: lhi %r2, 0 -; nextln: lochil %r2, 1 -; nextln: br %r14 +; block0: +; ch %r2, 0(%r3) +; lhi %r2, 0 +; lochil %r2, 1 +; br %r14 function %icmp_slt_i32_memoff_ext16(i32, i64) -> b1 { block0(v0: i32, v1: i64): @@ -218,10 +235,11 @@ block0(v0: i32, v1: i64): return v3 } -; check: chy %r2, 4096(%r3) -; nextln: lhi %r2, 0 -; nextln: lochil %r2, 1 -; nextln: br %r14 +; block0: +; chy %r2, 4096(%r3) +; lhi %r2, 0 +; lochil %r2, 1 +; br %r14 function %icmp_slt_i32_sym_ext16(i32) -> b1 { gv0 = symbol colocated %sym @@ -232,10 +250,11 @@ block0(v0: i32): return v3 } -; check: chrl %r2, %sym + 0 -; nextln: lhi %r2, 0 -; nextln: lochil %r2, 1 -; nextln: br %r14 +; block0: +; chrl %r2, %sym + 0 +; lhi %r2, 0 +; lochil %r2, 1 +; br %r14 function %icmp_slt_i16(i16, i16) -> b1 { block0(v0: i16, v1: i16): @@ -243,12 +262,14 @@ block0(v0: i16, v1: i16): return v2 } -; check: lhr %r2, %r2 -; nextln: lhr %r3, %r3 -; nextln: cr %r2, %r3 -; nextln: lhi %r2, 0 -; nextln: lochil %r2, 1 -; nextln: br %r14 +; block0: +; lgr %r5, %r3 +; lhr %r3, %r2 +; lhr %r5, %r5 +; cr %r3, %r5 +; lhi %r2, 0 +; lochil %r2, 1 +; br %r14 function %icmp_slt_i16_imm(i16) -> b1 { block0(v0: i16): @@ -257,11 +278,12 @@ block0(v0: i16): return v2 } -; check: lhr %r2, %r2 -; nextln: chi %r2, 1 -; nextln: lhi %r2, 0 -; nextln: lochil %r2, 1 -; nextln: br %r14 +; block0: +; lhr %r5, %r2 +; chi %r5, 1 +; lhi %r2, 0 +; lochil %r2, 1 +; br %r14 function %icmp_slt_i16_mem(i16, i64) -> b1 { block0(v0: i16, v1: i64): @@ -270,11 +292,12 @@ block0(v0: i16, v1: i64): return v3 } -; check: lhr %r2, %r2 -; nextln: ch %r2, 0(%r3) -; nextln: lhi %r2, 0 -; nextln: lochil %r2, 1 -; nextln: br %r14 +; block0: +; lhr %r4, %r2 +; ch %r4, 0(%r3) +; lhi %r2, 0 +; lochil %r2, 1 +; br %r14 function %icmp_slt_i16_sym(i16) -> b1 { gv0 = symbol colocated %sym @@ -285,11 +308,12 @@ block0(v0: i16): return v3 } -; check: lhr %r2, %r2 -; nextln: chrl %r2, %sym + 0 -; nextln: lhi %r2, 0 -; nextln: lochil %r2, 1 -; nextln: br %r14 +; block0: +; lhr %r5, %r2 +; chrl %r5, %sym + 0 +; lhi %r2, 0 +; lochil %r2, 1 +; br %r14 function %icmp_slt_i8(i8, i8) -> b1 { block0(v0: i8, v1: i8): @@ -297,12 +321,14 @@ block0(v0: i8, v1: i8): return v2 } -; check: lbr %r2, %r2 -; nextln: lbr %r3, %r3 -; nextln: cr %r2, %r3 -; nextln: lhi %r2, 0 -; nextln: lochil %r2, 1 -; nextln: br %r14 +; block0: +; lgr %r5, %r3 +; lbr %r3, %r2 +; lbr %r5, %r5 +; cr %r3, %r5 +; lhi %r2, 0 +; lochil %r2, 1 +; br %r14 function %icmp_slt_i8_imm(i8) -> b1 { block0(v0: i8): @@ -311,11 +337,12 @@ block0(v0: i8): return v2 } -; check: lbr %r2, %r2 -; nextln: chi %r2, 1 -; nextln: lhi %r2, 0 -; nextln: lochil %r2, 1 -; nextln: br %r14 +; block0: +; lbr %r5, %r2 +; chi %r5, 1 +; lhi %r2, 0 +; lochil %r2, 1 +; br %r14 function %icmp_slt_i8_mem(i8, i64) -> b1 { block0(v0: i8, v1: i64): @@ -324,12 +351,14 @@ block0(v0: i8, v1: i64): return v3 } -; check: lbr %r2, %r2 -; nextln: lb %r3, 0(%r3) -; nextln: cr %r2, %r3 -; nextln: lhi %r2, 0 -; nextln: lochil %r2, 1 -; nextln: br %r14 +; block0: +; lgr %r5, %r3 +; lbr %r3, %r2 +; lb %r5, 0(%r5) +; cr %r3, %r5 +; lhi %r2, 0 +; lochil %r2, 1 +; br %r14 function %icmp_ult_i64(i64, i64) -> b1 { block0(v0: i64, v1: i64): @@ -337,10 +366,11 @@ block0(v0: i64, v1: i64): return v2 } -; check: clgr %r2, %r3 -; nextln: lhi %r2, 0 -; nextln: lochil %r2, 1 -; nextln: br %r14 +; block0: +; clgr %r2, %r3 +; lhi %r2, 0 +; lochil %r2, 1 +; br %r14 function %icmp_ult_i64_ext32(i64, i32) -> b1 { block0(v0: i64, v1: i32): @@ -349,10 +379,11 @@ block0(v0: i64, v1: i32): return v3 } -; check: clgfr %r2, %r3 -; nextln: lhi %r2, 0 -; nextln: lochil %r2, 1 -; nextln: br %r14 +; block0: +; clgfr %r2, %r3 +; lhi %r2, 0 +; lochil %r2, 1 +; br %r14 function %icmp_ult_i64_imm(i64) -> b1 { block0(v0: i64): @@ -361,10 +392,11 @@ block0(v0: i64): return v2 } -; check: clgfi %r2, 1 -; nextln: lhi %r2, 0 -; nextln: lochil %r2, 1 -; nextln: br %r14 +; block0: +; clgfi %r2, 1 +; lhi %r2, 0 +; lochil %r2, 1 +; br %r14 function %icmp_ult_i64_mem(i64, i64) -> b1 { block0(v0: i64, v1: i64): @@ -373,10 +405,11 @@ block0(v0: i64, v1: i64): return v3 } -; check: clg %r2, 0(%r3) -; nextln: lhi %r2, 0 -; nextln: lochil %r2, 1 -; nextln: br %r14 +; block0: +; clg %r2, 0(%r3) +; lhi %r2, 0 +; lochil %r2, 1 +; br %r14 function %icmp_ult_i64_sym(i64) -> b1 { gv0 = symbol colocated %sym @@ -387,10 +420,11 @@ block0(v0: i64): return v3 } -; check: clgrl %r2, %sym + 0 -; nextln: lhi %r2, 0 -; nextln: lochil %r2, 1 -; nextln: br %r14 +; block0: +; clgrl %r2, %sym + 0 +; lhi %r2, 0 +; lochil %r2, 1 +; br %r14 function %icmp_ult_i64_mem_ext32(i64, i64) -> b1 { block0(v0: i64, v1: i64): @@ -399,10 +433,11 @@ block0(v0: i64, v1: i64): return v3 } -; check: clgf %r2, 0(%r3) -; nextln: lhi %r2, 0 -; nextln: lochil %r2, 1 -; nextln: br %r14 +; block0: +; clgf %r2, 0(%r3) +; lhi %r2, 0 +; lochil %r2, 1 +; br %r14 function %icmp_ult_i64_sym_ext32(i64) -> b1 { gv0 = symbol colocated %sym @@ -413,10 +448,11 @@ block0(v0: i64): return v3 } -; check: clgfrl %r2, %sym + 0 -; nextln: lhi %r2, 0 -; nextln: lochil %r2, 1 -; nextln: br %r14 +; block0: +; clgfrl %r2, %sym + 0 +; lhi %r2, 0 +; lochil %r2, 1 +; br %r14 function %icmp_ult_i64_mem_ext16(i64, i64) -> b1 { block0(v0: i64, v1: i64): @@ -425,11 +461,12 @@ block0(v0: i64, v1: i64): return v3 } -; check: llgh %r3, 0(%r3) -; check: clgr %r2, %r3 -; nextln: lhi %r2, 0 -; nextln: lochil %r2, 1 -; nextln: br %r14 +; block0: +; llgh %r4, 0(%r3) +; clgr %r2, %r4 +; lhi %r2, 0 +; lochil %r2, 1 +; br %r14 function %icmp_ult_i64_sym_ext16(i64) -> b1 { gv0 = symbol colocated %sym @@ -440,10 +477,11 @@ block0(v0: i64): return v3 } -; check: clghrl %r2, %sym + 0 -; nextln: lhi %r2, 0 -; nextln: lochil %r2, 1 -; nextln: br %r14 +; block0: +; clghrl %r2, %sym + 0 +; lhi %r2, 0 +; lochil %r2, 1 +; br %r14 function %icmp_ult_i32(i32, i32) -> b1 { block0(v0: i32, v1: i32): @@ -451,10 +489,11 @@ block0(v0: i32, v1: i32): return v2 } -; check: clr %r2, %r3 -; nextln: lhi %r2, 0 -; nextln: lochil %r2, 1 -; nextln: br %r14 +; block0: +; clr %r2, %r3 +; lhi %r2, 0 +; lochil %r2, 1 +; br %r14 function %icmp_ult_i32_imm(i32) -> b1 { block0(v0: i32): @@ -463,10 +502,11 @@ block0(v0: i32): return v2 } -; check: clfi %r2, 1 -; nextln: lhi %r2, 0 -; nextln: lochil %r2, 1 -; nextln: br %r14 +; block0: +; clfi %r2, 1 +; lhi %r2, 0 +; lochil %r2, 1 +; br %r14 function %icmp_ult_i32_mem(i32, i64) -> b1 { block0(v0: i32, v1: i64): @@ -475,10 +515,11 @@ block0(v0: i32, v1: i64): return v3 } -; check: cl %r2, 0(%r3) -; nextln: lhi %r2, 0 -; nextln: lochil %r2, 1 -; nextln: br %r14 +; block0: +; cl %r2, 0(%r3) +; lhi %r2, 0 +; lochil %r2, 1 +; br %r14 function %icmp_ult_i32_memoff(i32, i64) -> b1 { block0(v0: i32, v1: i64): @@ -487,10 +528,11 @@ block0(v0: i32, v1: i64): return v3 } -; check: cly %r2, 4096(%r3) -; nextln: lhi %r2, 0 -; nextln: lochil %r2, 1 -; nextln: br %r14 +; block0: +; cly %r2, 4096(%r3) +; lhi %r2, 0 +; lochil %r2, 1 +; br %r14 function %icmp_ult_i32_sym(i32) -> b1 { gv0 = symbol colocated %sym @@ -501,10 +543,11 @@ block0(v0: i32): return v3 } -; check: clrl %r2, %sym + 0 -; nextln: lhi %r2, 0 -; nextln: lochil %r2, 1 -; nextln: br %r14 +; block0: +; clrl %r2, %sym + 0 +; lhi %r2, 0 +; lochil %r2, 1 +; br %r14 function %icmp_ult_i32_mem_ext16(i32, i64) -> b1 { block0(v0: i32, v1: i64): @@ -513,11 +556,12 @@ block0(v0: i32, v1: i64): return v3 } -; check: llh %r3, 0(%r3) -; check: clr %r2, %r3 -; nextln: lhi %r2, 0 -; nextln: lochil %r2, 1 -; nextln: br %r14 +; block0: +; llh %r4, 0(%r3) +; clr %r2, %r4 +; lhi %r2, 0 +; lochil %r2, 1 +; br %r14 function %icmp_ult_i32_sym_ext16(i32) -> b1 { gv0 = symbol colocated %sym @@ -528,10 +572,11 @@ block0(v0: i32): return v3 } -; check: clhrl %r2, %sym + 0 -; nextln: lhi %r2, 0 -; nextln: lochil %r2, 1 -; nextln: br %r14 +; block0: +; clhrl %r2, %sym + 0 +; lhi %r2, 0 +; lochil %r2, 1 +; br %r14 function %icmp_ult_i16(i16, i16) -> b1 { block0(v0: i16, v1: i16): @@ -539,12 +584,14 @@ block0(v0: i16, v1: i16): return v2 } -; check: llhr %r2, %r2 -; nextln: llhr %r3, %r3 -; nextln: clr %r2, %r3 -; nextln: lhi %r2, 0 -; nextln: lochil %r2, 1 -; nextln: br %r14 +; block0: +; lgr %r5, %r3 +; llhr %r3, %r2 +; llhr %r5, %r5 +; clr %r3, %r5 +; lhi %r2, 0 +; lochil %r2, 1 +; br %r14 function %icmp_ult_i16_imm(i16) -> b1 { block0(v0: i16): @@ -553,11 +600,12 @@ block0(v0: i16): return v2 } -; check: llhr %r2, %r2 -; nextln: clfi %r2, 1 -; nextln: lhi %r2, 0 -; nextln: lochil %r2, 1 -; nextln: br %r14 +; block0: +; llhr %r5, %r2 +; clfi %r5, 1 +; lhi %r2, 0 +; lochil %r2, 1 +; br %r14 function %icmp_ult_i16_mem(i16, i64) -> b1 { block0(v0: i16, v1: i64): @@ -566,12 +614,14 @@ block0(v0: i16, v1: i64): return v3 } -; check: llhr %r2, %r2 -; nextln: llh %r3, 0(%r3) -; nextln: clr %r2, %r3 -; nextln: lhi %r2, 0 -; nextln: lochil %r2, 1 -; nextln: br %r14 +; block0: +; lgr %r5, %r3 +; llhr %r3, %r2 +; llh %r5, 0(%r5) +; clr %r3, %r5 +; lhi %r2, 0 +; lochil %r2, 1 +; br %r14 function %icmp_ult_i16_mem(i16) -> b1 { gv0 = symbol colocated %sym @@ -582,11 +632,12 @@ block0(v0: i16): return v3 } -; check: llhr %r2, %r2 -; nextln: clhrl %r2, %sym + 0 -; nextln: lhi %r2, 0 -; nextln: lochil %r2, 1 -; nextln: br %r14 +; block0: +; llhr %r5, %r2 +; clhrl %r5, %sym + 0 +; lhi %r2, 0 +; lochil %r2, 1 +; br %r14 function %icmp_ult_i8(i8, i8) -> b1 { block0(v0: i8, v1: i8): @@ -594,12 +645,14 @@ block0(v0: i8, v1: i8): return v2 } -; check: llcr %r2, %r2 -; nextln: llcr %r3, %r3 -; nextln: clr %r2, %r3 -; nextln: lhi %r2, 0 -; nextln: lochil %r2, 1 -; nextln: br %r14 +; block0: +; lgr %r5, %r3 +; llcr %r3, %r2 +; llcr %r5, %r5 +; clr %r3, %r5 +; lhi %r2, 0 +; lochil %r2, 1 +; br %r14 function %icmp_ult_i8_imm(i8) -> b1 { block0(v0: i8): @@ -608,11 +661,12 @@ block0(v0: i8): return v2 } -; check: llcr %r2, %r2 -; nextln: clfi %r2, 1 -; nextln: lhi %r2, 0 -; nextln: lochil %r2, 1 -; nextln: br %r14 +; block0: +; llcr %r5, %r2 +; clfi %r5, 1 +; lhi %r2, 0 +; lochil %r2, 1 +; br %r14 function %icmp_ult_i8_mem(i8, i64) -> b1 { block0(v0: i8, v1: i64): @@ -621,10 +675,12 @@ block0(v0: i8, v1: i64): return v3 } -; check: llcr %r2, %r2 -; nextln: llc %r3, 0(%r3) -; nextln: clr %r2, %r3 -; nextln: lhi %r2, 0 -; nextln: lochil %r2, 1 -; nextln: br %r14 +; block0: +; lgr %r5, %r3 +; llcr %r3, %r2 +; llc %r5, 0(%r5) +; clr %r3, %r5 +; lhi %r2, 0 +; lochil %r2, 1 +; br %r14 diff --git a/cranelift/filetests/filetests/isa/s390x/jumptable.clif b/cranelift/filetests/filetests/isa/s390x/jumptable.clif index 3517efcf53..69603266cf 100644 --- a/cranelift/filetests/filetests/isa/s390x/jumptable.clif +++ b/cranelift/filetests/filetests/isa/s390x/jumptable.clif @@ -1,4 +1,4 @@ -test compile +test compile precise-output target s390x function %f(i64) -> i64 { @@ -28,24 +28,32 @@ block5(v5: i64): return v6 } -; check: clgfi %r2, 3 -; nextln: jghe label1 -; nextln: sllg %r3, %r2, 2 -; nextln: larl %r1, 14 ; agf %r1, 0(%r1, %r3) ; br %r1 ; jt_entries label3 label5 label7 - -; check: Block 3 -; check: lghi %r3, 1 -; nextln: jg - -; check: Block 5 -; check: lghi %r3, 2 -; nextln: jg - -; check: Block 7 -; check: lghi %r3, 3 -; nextln: jg - -; check: agr %r2, %r3 -; nextln: br %r14 - +; block0: +; clgfi %r2, 3 +; jghe label1 +; sllg %r5, %r2, 2 +; larl %r1, 14 ; agf %r1, 0(%r1, %r5) ; br %r1 ; jt_entries label3 label5 label7 +; block1: +; lghi %r4, 4 +; jg label2 +; block2: +; jg label9 +; block3: +; lghi %r4, 1 +; jg label4 +; block4: +; jg label9 +; block5: +; lghi %r4, 2 +; jg label6 +; block6: +; jg label9 +; block7: +; lghi %r4, 3 +; jg label8 +; block8: +; jg label9 +; block9: +; agr %r2, %r4 +; br %r14 diff --git a/cranelift/filetests/filetests/isa/s390x/load-little.clif b/cranelift/filetests/filetests/isa/s390x/load-little.clif index 6561863d73..876e929f77 100644 --- a/cranelift/filetests/filetests/isa/s390x/load-little.clif +++ b/cranelift/filetests/filetests/isa/s390x/load-little.clif @@ -1,4 +1,4 @@ -test compile +test compile precise-output target s390x function %load_i64(i64) -> i64 { @@ -7,8 +7,9 @@ block0(v0: i64): return v1 } -; check: lrvg %r2, 0(%r2) -; nextln: br %r14 +; block0: +; lrvg %r2, 0(%r2) +; br %r14 function %load_i64_sym() -> i64 { gv0 = symbol colocated %sym @@ -18,8 +19,9 @@ block0: return v1 } -; check: larl %r1, %sym + 0 ; lrvg %r2, 0(%r1) -; nextln: br %r14 +; block0: +; larl %r1, %sym + 0 ; lrvg %r2, 0(%r1) +; br %r14 function %uload8_i64(i64) -> i64 { block0(v0: i64): @@ -27,8 +29,9 @@ block0(v0: i64): return v1 } -; check: llgc %r2, 0(%r2) -; nextln: br %r14 +; block0: +; llgc %r2, 0(%r2) +; br %r14 function %sload8_i64(i64) -> i64 { block0(v0: i64): @@ -36,8 +39,9 @@ block0(v0: i64): return v1 } -; check: lgb %r2, 0(%r2) -; nextln: br %r14 +; block0: +; lgb %r2, 0(%r2) +; br %r14 function %uload16_i64(i64) -> i64 { block0(v0: i64): @@ -45,9 +49,10 @@ block0(v0: i64): return v1 } -; check: lrvh %r2, 0(%r2) -; nextln: llghr %r2, %r2 -; nextln: br %r14 +; block0: +; lrvh %r5, 0(%r2) +; llghr %r2, %r5 +; br %r14 function %uload16_i64_sym() -> i64 { gv0 = symbol colocated %sym @@ -57,9 +62,10 @@ block0: return v1 } -; check: larl %r1, %sym + 0 ; lrvh %r2, 0(%r1) -; nextln: llghr %r2, %r2 -; nextln: br %r14 +; block0: +; larl %r1, %sym + 0 ; lrvh %r3, 0(%r1) +; llghr %r2, %r3 +; br %r14 function %sload16_i64(i64) -> i64 { block0(v0: i64): @@ -67,9 +73,10 @@ block0(v0: i64): return v1 } -; check: lrvh %r2, 0(%r2) -; nextln: lghr %r2, %r2 -; nextln: br %r14 +; block0: +; lrvh %r5, 0(%r2) +; lghr %r2, %r5 +; br %r14 function %sload16_i64_sym() -> i64 { gv0 = symbol colocated %sym @@ -79,9 +86,10 @@ block0: return v1 } -; check: larl %r1, %sym + 0 ; lrvh %r2, 0(%r1) -; nextln: lghr %r2, %r2 -; nextln: br %r14 +; block0: +; larl %r1, %sym + 0 ; lrvh %r3, 0(%r1) +; lghr %r2, %r3 +; br %r14 function %uload32_i64(i64) -> i64 { block0(v0: i64): @@ -89,9 +97,10 @@ block0(v0: i64): return v1 } -; check: lrv %r2, 0(%r2) -; nextln: llgfr %r2, %r2 -; nextln: br %r14 +; block0: +; lrv %r5, 0(%r2) +; llgfr %r2, %r5 +; br %r14 function %uload32_i64_sym() -> i64 { gv0 = symbol colocated %sym @@ -101,9 +110,10 @@ block0: return v1 } -; check: larl %r1, %sym + 0 ; lrv %r2, 0(%r1) -; nextln: llgfr %r2, %r2 -; nextln: br %r14 +; block0: +; larl %r1, %sym + 0 ; lrv %r3, 0(%r1) +; llgfr %r2, %r3 +; br %r14 function %sload32_i64(i64) -> i64 { block0(v0: i64): @@ -111,9 +121,10 @@ block0(v0: i64): return v1 } -; check: lrv %r2, 0(%r2) -; nextln: lgfr %r2, %r2 -; nextln: br %r14 +; block0: +; lrv %r5, 0(%r2) +; lgfr %r2, %r5 +; br %r14 function %sload32_i64_sym() -> i64 { gv0 = symbol colocated %sym @@ -123,9 +134,10 @@ block0: return v1 } -; check: larl %r1, %sym + 0 ; lrv %r2, 0(%r1) -; nextln: lgfr %r2, %r2 -; nextln: br %r14 +; block0: +; larl %r1, %sym + 0 ; lrv %r3, 0(%r1) +; lgfr %r2, %r3 +; br %r14 function %load_i32(i64) -> i32 { block0(v0: i64): @@ -133,8 +145,9 @@ block0(v0: i64): return v1 } -; check: lrv %r2, 0(%r2) -; nextln: br %r14 +; block0: +; lrv %r2, 0(%r2) +; br %r14 function %load_i32_sym() -> i32 { gv0 = symbol colocated %sym @@ -144,8 +157,9 @@ block0: return v1 } -; check: larl %r1, %sym + 0 ; lrv %r2, 0(%r1) -; nextln: br %r14 +; block0: +; larl %r1, %sym + 0 ; lrv %r2, 0(%r1) +; br %r14 function %uload8_i32(i64) -> i32 { block0(v0: i64): @@ -153,8 +167,9 @@ block0(v0: i64): return v1 } -; check: llc %r2, 0(%r2) -; nextln: br %r14 +; block0: +; llc %r2, 0(%r2) +; br %r14 function %sload8_i32(i64) -> i32 { block0(v0: i64): @@ -162,8 +177,9 @@ block0(v0: i64): return v1 } -; check: lb %r2, 0(%r2) -; nextln: br %r14 +; block0: +; lb %r2, 0(%r2) +; br %r14 function %uload16_i32(i64) -> i32 { block0(v0: i64): @@ -171,9 +187,10 @@ block0(v0: i64): return v1 } -; check: lrvh %r2, 0(%r2) -; nextln: llhr %r2, %r2 -; nextln: br %r14 +; block0: +; lrvh %r5, 0(%r2) +; llhr %r2, %r5 +; br %r14 function %uload16_i32_sym() -> i32 { gv0 = symbol colocated %sym @@ -183,9 +200,10 @@ block0: return v1 } -; check: larl %r1, %sym + 0 ; lrvh %r2, 0(%r1) -; nextln: llhr %r2, %r2 -; nextln: br %r14 +; block0: +; larl %r1, %sym + 0 ; lrvh %r3, 0(%r1) +; llhr %r2, %r3 +; br %r14 function %sload16_i32(i64) -> i32 { block0(v0: i64): @@ -193,9 +211,10 @@ block0(v0: i64): return v1 } -; check: lrvh %r2, 0(%r2) -; nextln: lhr %r2, %r2 -; nextln: br %r14 +; block0: +; lrvh %r5, 0(%r2) +; lhr %r2, %r5 +; br %r14 function %sload16_i32_sym() -> i32 { gv0 = symbol colocated %sym @@ -205,9 +224,10 @@ block0: return v1 } -; check: larl %r1, %sym + 0 ; lrvh %r2, 0(%r1) -; nextln: lhr %r2, %r2 -; nextln: br %r14 +; block0: +; larl %r1, %sym + 0 ; lrvh %r3, 0(%r1) +; lhr %r2, %r3 +; br %r14 function %load_i16(i64) -> i16 { block0(v0: i64): @@ -215,8 +235,9 @@ block0(v0: i64): return v1 } -; check: lrvh %r2, 0(%r2) -; nextln: br %r14 +; block0: +; lrvh %r2, 0(%r2) +; br %r14 function %load_i16_sym() -> i16 { gv0 = symbol colocated %sym @@ -226,8 +247,9 @@ block0: return v1 } -; check: larl %r1, %sym + 0 ; lrvh %r2, 0(%r1) -; nextln: br %r14 +; block0: +; larl %r1, %sym + 0 ; lrvh %r2, 0(%r1) +; br %r14 function %uload8_i16(i64) -> i16 { block0(v0: i64): @@ -235,8 +257,9 @@ block0(v0: i64): return v1 } -; check: llc %r2, 0(%r2) -; nextln: br %r14 +; block0: +; llc %r2, 0(%r2) +; br %r14 function %sload8_i16(i64) -> i16 { block0(v0: i64): @@ -244,8 +267,9 @@ block0(v0: i64): return v1 } -; check: lb %r2, 0(%r2) -; nextln: br %r14 +; block0: +; lb %r2, 0(%r2) +; br %r14 function %load_i8(i64) -> i8 { block0(v0: i64): @@ -253,6 +277,7 @@ block0(v0: i64): return v1 } -; check: llc %r2, 0(%r2) -; nextln: br %r14 +; block0: +; llc %r2, 0(%r2) +; br %r14 diff --git a/cranelift/filetests/filetests/isa/s390x/load.clif b/cranelift/filetests/filetests/isa/s390x/load.clif index 8d46fe0867..1d0a4a10c7 100644 --- a/cranelift/filetests/filetests/isa/s390x/load.clif +++ b/cranelift/filetests/filetests/isa/s390x/load.clif @@ -1,4 +1,4 @@ -test compile +test compile precise-output target s390x function %load_i64(i64) -> i64 { @@ -7,8 +7,9 @@ block0(v0: i64): return v1 } -; check: lg %r2, 0(%r2) -; nextln: br %r14 +; block0: +; lg %r2, 0(%r2) +; br %r14 function %load_i64_sym() -> i64 { gv0 = symbol colocated %sym @@ -18,8 +19,9 @@ block0: return v1 } -; check: lgrl %r2, %sym + 0 -; nextln: br %r14 +; block0: +; lgrl %r2, %sym + 0 +; br %r14 function %uload8_i64(i64) -> i64 { block0(v0: i64): @@ -27,8 +29,9 @@ block0(v0: i64): return v1 } -; check: llgc %r2, 0(%r2) -; nextln: br %r14 +; block0: +; llgc %r2, 0(%r2) +; br %r14 function %sload8_i64(i64) -> i64 { block0(v0: i64): @@ -36,8 +39,9 @@ block0(v0: i64): return v1 } -; check: lgb %r2, 0(%r2) -; nextln: br %r14 +; block0: +; lgb %r2, 0(%r2) +; br %r14 function %uload16_i64(i64) -> i64 { block0(v0: i64): @@ -45,8 +49,9 @@ block0(v0: i64): return v1 } -; check: llgh %r2, 0(%r2) -; nextln: br %r14 +; block0: +; llgh %r2, 0(%r2) +; br %r14 function %uload16_i64_sym() -> i64 { gv0 = symbol colocated %sym @@ -56,8 +61,9 @@ block0: return v1 } -; check: llghrl %r2, %sym + 0 -; nextln: br %r14 +; block0: +; llghrl %r2, %sym + 0 +; br %r14 function %sload16_i64(i64) -> i64 { block0(v0: i64): @@ -65,8 +71,9 @@ block0(v0: i64): return v1 } -; check: lgh %r2, 0(%r2) -; nextln: br %r14 +; block0: +; lgh %r2, 0(%r2) +; br %r14 function %sload16_i64_sym() -> i64 { gv0 = symbol colocated %sym @@ -76,8 +83,9 @@ block0: return v1 } -; check: lghrl %r2, %sym + 0 -; nextln: br %r14 +; block0: +; lghrl %r2, %sym + 0 +; br %r14 function %uload32_i64(i64) -> i64 { block0(v0: i64): @@ -85,8 +93,9 @@ block0(v0: i64): return v1 } -; check: llgf %r2, 0(%r2) -; nextln: br %r14 +; block0: +; llgf %r2, 0(%r2) +; br %r14 function %uload32_i64_sym() -> i64 { gv0 = symbol colocated %sym @@ -96,8 +105,9 @@ block0: return v1 } -; check: llgfrl %r2, %sym + 0 -; nextln: br %r14 +; block0: +; llgfrl %r2, %sym + 0 +; br %r14 function %sload32_i64(i64) -> i64 { block0(v0: i64): @@ -105,8 +115,9 @@ block0(v0: i64): return v1 } -; check: lgf %r2, 0(%r2) -; nextln: br %r14 +; block0: +; lgf %r2, 0(%r2) +; br %r14 function %sload32_i64_sym() -> i64 { gv0 = symbol colocated %sym @@ -116,8 +127,9 @@ block0: return v1 } -; check: lgfrl %r2, %sym + 0 -; nextln: br %r14 +; block0: +; lgfrl %r2, %sym + 0 +; br %r14 function %load_i32(i64) -> i32 { block0(v0: i64): @@ -125,8 +137,9 @@ block0(v0: i64): return v1 } -; check: l %r2, 0(%r2) -; nextln: br %r14 +; block0: +; l %r2, 0(%r2) +; br %r14 function %load_i32_sym() -> i32 { gv0 = symbol colocated %sym @@ -136,8 +149,9 @@ block0: return v1 } -; check: lrl %r2, %sym + 0 -; nextln: br %r14 +; block0: +; lrl %r2, %sym + 0 +; br %r14 function %load_i32_off(i64) -> i32 { block0(v0: i64): @@ -145,8 +159,9 @@ block0(v0: i64): return v1 } -; check: ly %r2, 4096(%r2) -; nextln: br %r14 +; block0: +; ly %r2, 4096(%r2) +; br %r14 function %uload8_i32(i64) -> i32 { block0(v0: i64): @@ -154,8 +169,9 @@ block0(v0: i64): return v1 } -; check: llc %r2, 0(%r2) -; nextln: br %r14 +; block0: +; llc %r2, 0(%r2) +; br %r14 function %sload8_i32(i64) -> i32 { block0(v0: i64): @@ -163,8 +179,9 @@ block0(v0: i64): return v1 } -; check: lb %r2, 0(%r2) -; nextln: br %r14 +; block0: +; lb %r2, 0(%r2) +; br %r14 function %uload16_i32(i64) -> i32 { block0(v0: i64): @@ -172,8 +189,9 @@ block0(v0: i64): return v1 } -; check: llh %r2, 0(%r2) -; nextln: br %r14 +; block0: +; llh %r2, 0(%r2) +; br %r14 function %uload16_i32_sym() -> i32 { gv0 = symbol colocated %sym @@ -183,8 +201,9 @@ block0: return v1 } -; check: llhrl %r2, %sym + 0 -; nextln: br %r14 +; block0: +; llhrl %r2, %sym + 0 +; br %r14 function %sload16_i32(i64) -> i32 { block0(v0: i64): @@ -192,8 +211,9 @@ block0(v0: i64): return v1 } -; check: lh %r2, 0(%r2) -; nextln: br %r14 +; block0: +; lh %r2, 0(%r2) +; br %r14 function %sload16_i32_off(i64) -> i32 { block0(v0: i64): @@ -201,8 +221,9 @@ block0(v0: i64): return v1 } -; check: lhy %r2, 4096(%r2) -; nextln: br %r14 +; block0: +; lhy %r2, 4096(%r2) +; br %r14 function %sload16_i32_sym() -> i32 { gv0 = symbol colocated %sym @@ -212,8 +233,9 @@ block0: return v1 } -; check: lhrl %r2, %sym + 0 -; nextln: br %r14 +; block0: +; lhrl %r2, %sym + 0 +; br %r14 function %load_i16(i64) -> i16 { block0(v0: i64): @@ -221,8 +243,9 @@ block0(v0: i64): return v1 } -; check: llh %r2, 0(%r2) -; nextln: br %r14 +; block0: +; llh %r2, 0(%r2) +; br %r14 function %load_i16_sym() -> i16 { gv0 = symbol colocated %sym @@ -232,8 +255,9 @@ block0: return v1 } -; check: llhrl %r2, %sym + 0 -; nextln: br %r14 +; block0: +; llhrl %r2, %sym + 0 +; br %r14 function %uload8_i16(i64) -> i16 { block0(v0: i64): @@ -241,8 +265,9 @@ block0(v0: i64): return v1 } -; check: llc %r2, 0(%r2) -; nextln: br %r14 +; block0: +; llc %r2, 0(%r2) +; br %r14 function %sload8_i16(i64) -> i16 { block0(v0: i64): @@ -250,8 +275,9 @@ block0(v0: i64): return v1 } -; check: lb %r2, 0(%r2) -; nextln: br %r14 +; block0: +; lb %r2, 0(%r2) +; br %r14 function %load_i8(i64) -> i8 { block0(v0: i64): @@ -259,6 +285,7 @@ block0(v0: i64): return v1 } -; check: llc %r2, 0(%r2) -; nextln: br %r14 +; block0: +; llc %r2, 0(%r2) +; br %r14 diff --git a/cranelift/filetests/filetests/isa/s390x/multivalue-ret.clif b/cranelift/filetests/filetests/isa/s390x/multivalue-ret.clif index d9197d3072..896f9a49d2 100644 --- a/cranelift/filetests/filetests/isa/s390x/multivalue-ret.clif +++ b/cranelift/filetests/filetests/isa/s390x/multivalue-ret.clif @@ -1,4 +1,4 @@ -test compile +test compile precise-output target s390x ;; Test default (non-SpiderMonkey) ABI. @@ -11,11 +11,12 @@ block1: return v0, v1, v2, v3 } -; check: lghi %r2, 1 -; nextln: lghi %r3, 2 -; nextln: lghi %r4, 3 -; nextln: lghi %r5, 4 -; nextln: br %r14 +; block0: +; lghi %r2, 1 +; lghi %r3, 2 +; lghi %r4, 3 +; lghi %r5, 4 +; br %r14 function %f1() -> i64, i64, i64, i64, i64, i64 { block1: @@ -28,20 +29,20 @@ block1: return v0, v1, v2, v3, v4, v5 } -; check: stmg %r12, %r15, 96(%r15) -; nextln: lgr %r14, %r2 -; nextln: lghi %r2, 1 -; nextln: lghi %r3, 2 -; nextln: lghi %r4, 3 -; nextln: lghi %r5, 4 -; nextln: lghi %r13, 5 -; nextln: lghi %r12, 6 -; nextln: stg %r13, 0(%r14) -; nextln: stg %r12, 8(%r14) -; nextln: lmg %r12, %r15, 96(%r15) -; nextln: br %r14 +; stmg %r6, %r15, 48(%r15) +; block0: +; lgr %r12, %r2 +; lghi %r2, 1 +; lghi %r3, 2 +; lghi %r4, 3 +; lghi %r5, 4 +; lghi %r10, 5 +; lghi %r6, 6 +; stg %r10, 0(%r12) +; stg %r6, 8(%r12) +; lmg %r6, %r15, 48(%r15) +; br %r14 -;; Test default (non-SpiderMonkey) ABI. function %f3() -> f64, f64, f64, f64 { block1: v0 = f64const 0x0.0 @@ -51,11 +52,12 @@ block1: return v0, v1, v2, v3 } -; check: bras %r1, 12 ; data.f64 0 ; ld %f0, 0(%r1) -; nextln: bras %r1, 12 ; data.f64 1 ; ld %f2, 0(%r1) -; nextln: bras %r1, 12 ; data.f64 2 ; ld %f4, 0(%r1) -; nextln: bras %r1, 12 ; data.f64 3 ; ld %f6, 0(%r1) -; nextln: br %r14 +; block0: +; bras %r1, 12 ; data.f64 0 ; ld %f0, 0(%r1) +; bras %r1, 12 ; data.f64 1 ; ld %f2, 0(%r1) +; bras %r1, 12 ; data.f64 2 ; ld %f4, 0(%r1) +; bras %r1, 12 ; data.f64 3 ; ld %f6, 0(%r1) +; br %r14 function %f4() -> f64, f64, f64, f64, f64, f64 { block1: @@ -68,12 +70,14 @@ block1: return v0, v1, v2, v3, v4, v5 } -; check: bras %r1, 12 ; data.f64 0 ; ld %f0, 0(%r1) -; nextln: bras %r1, 12 ; data.f64 1 ; ld %f2, 0(%r1) -; nextln: bras %r1, 12 ; data.f64 2 ; ld %f4, 0(%r1) -; nextln: bras %r1, 12 ; data.f64 3 ; ld %f6, 0(%r1) -; nextln: bras %r1, 12 ; data.f64 4 ; ld %f1, 0(%r1) -; nextln: bras %r1, 12 ; data.f64 5 ; ld %f3, 0(%r1) -; nextln: std %f1, 0(%r2) -; nextln: std %f3, 8(%r2) -; nextln: br %r14 +; block0: +; bras %r1, 12 ; data.f64 0 ; ld %f0, 0(%r1) +; bras %r1, 12 ; data.f64 1 ; ld %f2, 0(%r1) +; bras %r1, 12 ; data.f64 2 ; ld %f4, 0(%r1) +; bras %r1, 12 ; data.f64 3 ; ld %f6, 0(%r1) +; bras %r1, 12 ; data.f64 4 ; ld %f5, 0(%r1) +; bras %r1, 12 ; data.f64 5 ; ld %f7, 0(%r1) +; std %f5, 0(%r2) +; std %f7, 8(%r2) +; br %r14 + diff --git a/cranelift/filetests/filetests/isa/s390x/reftypes.clif b/cranelift/filetests/filetests/isa/s390x/reftypes.clif index 0ffdcab20e..adb8c53f0c 100644 --- a/cranelift/filetests/filetests/isa/s390x/reftypes.clif +++ b/cranelift/filetests/filetests/isa/s390x/reftypes.clif @@ -1,4 +1,4 @@ -test compile +test compile precise-output target s390x function %f0(r64, r64) -> r64 { @@ -6,8 +6,9 @@ block0(v0: r64, v1: r64): return v1 } -; check: lgr %r2, %r3 -; nextln: br %r14 +; block0: +; lgr %r2, %r3 +; br %r14 function %f1(r64) -> b1 { block0(v0: r64): @@ -15,10 +16,11 @@ block0(v0: r64): return v1 } -; check: cghi %r2, 0 -; nextln: lhi %r2, 0 -; nextln: lochie %r2, 1 -; nextln: br %r14 +; block0: +; cghi %r2, 0 +; lhi %r2, 0 +; lochie %r2, 1 +; br %r14 function %f2(r64) -> b1 { block0(v0: r64): @@ -26,10 +28,11 @@ block0(v0: r64): return v1 } -; check: cghi %r2, -1 -; nextln: lhi %r2, 0 -; nextln: lochie %r2, 1 -; nextln: br %r14 +; block0: +; cghi %r2, -1 +; lhi %r2, 0 +; lochie %r2, 1 +; br %r14 function %f3() -> r64 { block0: @@ -37,8 +40,9 @@ block0: return v0 } -; check: lghi %r2, 0 -; nextln: br %r14 +; block0: +; lghi %r2, 0 +; br %r14 function %f4(r64, r64) -> r64, r64, r64 { fn0 = %f(r64) -> b1 @@ -61,41 +65,36 @@ block3(v7: r64, v8: r64): return v7, v8, v9 } -; check: Block 0: -; check: stmg %r12, %r15, 96(%r15) -; nextln: aghi %r15, -192 -; nextln: virtual_sp_offset_adjust 160 -; nextln: lgr %r13, %r2 -; nextln: lgr %r12, %r3 -; nextln: lgr %r2, %r13 -; nextln: bras %r1, 12 ; data %f + 0 ; lg %r3, 0(%r1) -; nextln: stg %r2, 168(%r15) -; nextln: stg %r13, 176(%r15) -; nextln: stg %r12, 184(%r15) -; nextln: (safepoint: slots [S0, S1, S2] -; nextln: basr %r14, %r3 -; nextln: lg %r13, 176(%r15) -; nextln: lg %r12, 184(%r15) -; nextln: la %r3, 160(%r15) -; nextln: stg %r13, 0(%r3) -; nextln: llcr %r2, %r2 -; nextln: chi %r2, 0 -; nextln: jgnlh label1 ; jg label3 -; check: Block 1: -; check: jg label2 -; check: Block 2: -; check: lgr %r2, %r12 -; nextln: jg label5 -; check: Block 3: -; check: jg label4 -; check: Block 4: -; check: lgr %r2, %r13 -; nextln: lgr %r13, %r12 -; nextln: jg label5 -; check: Block 5: -; check: la %r3, 160(%r15) -; nextln: lg %r3, 0(%r3) -; nextln: lgr %r4, %r3 -; nextln: lgr %r3, %r13 -; nextln: lmg %r12, %r15, 288(%r15) -; nextln: br %r14 +; stmg %r14, %r15, 112(%r15) +; aghi %r15, -184 +; virtual_sp_offset_adjust 160 +; block0: +; lgr %r4, %r3 +; lgr %r3, %r2 +; bras %r1, 12 ; data %f + 0 ; lg %r5, 0(%r1) +; stg %r3, 168(%r15) +; stg %r4, 176(%r15) +; basr %r14, %r5 +; lg %r3, 168(%r15) +; la %r4, 160(%r15) +; stg %r3, 0(%r4) +; llcr %r4, %r2 +; chi %r4, 0 +; jgnlh label1 ; jg label3 +; block1: +; jg label2 +; block2: +; lg %r2, 176(%r15) +; jg label5 +; block3: +; jg label4 +; block4: +; lgr %r2, %r3 +; lg %r3, 176(%r15) +; jg label5 +; block5: +; la %r4, 160(%r15) +; lg %r4, 0(%r4) +; lmg %r14, %r15, 296(%r15) +; br %r14 + diff --git a/cranelift/filetests/filetests/isa/s390x/saturating-ops.clif b/cranelift/filetests/filetests/isa/s390x/saturating-ops.clif index 193a02eaad..21c328e4ce 100644 --- a/cranelift/filetests/filetests/isa/s390x/saturating-ops.clif +++ b/cranelift/filetests/filetests/isa/s390x/saturating-ops.clif @@ -1,12 +1,16 @@ -test compile +test compile precise-output target s390x ; FIXME: not yet supported function %uaddsat64(i64, i64) -> i64 { block0(v0: i64, v1: i64): -; v2 = uadd_sat.i64 v0, v1 +;;SKIP v2 = uadd_sat.i64 v0, v1 v2 = iconst.i64 0 return v2 } +; block0: +; lghi %r2, 0 +; br %r14 + diff --git a/cranelift/filetests/filetests/isa/s390x/shift-rotate.clif b/cranelift/filetests/filetests/isa/s390x/shift-rotate.clif index 0066bef498..552dfa62e8 100644 --- a/cranelift/filetests/filetests/isa/s390x/shift-rotate.clif +++ b/cranelift/filetests/filetests/isa/s390x/shift-rotate.clif @@ -1,4 +1,4 @@ -test compile +test compile precise-output target s390x ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; @@ -11,9 +11,10 @@ block0(v0: i64, v1: i64): return v2 } -; check: lcgr %r3, %r3 -; nextln: rllg %r2, %r2, 0(%r3) -; nextln: br %r14 +; block0: +; lcgr %r3, %r3 +; rllg %r2, %r2, 0(%r3) +; br %r14 function %rotr_i64_imm(i64) -> i64 { block0(v0: i64): @@ -22,8 +23,9 @@ block0(v0: i64): return v2 } -; check: rllg %r2, %r2, 47 -; nextln: br %r14 +; block0: +; rllg %r2, %r2, 47 +; br %r14 function %rotr_i32_reg(i32, i32) -> i32 { block0(v0: i32, v1: i32): @@ -31,9 +33,10 @@ block0(v0: i32, v1: i32): return v2 } -; check: lcr %r3, %r3 -; nextln: rll %r2, %r2, 0(%r3) -; nextln: br %r14 +; block0: +; lcr %r3, %r3 +; rll %r2, %r2, 0(%r3) +; br %r14 function %rotr_i32_imm(i32) -> i32 { block0(v0: i32): @@ -42,8 +45,9 @@ block0(v0: i32): return v2 } -; check: rll %r2, %r2, 15 -; nextln: br %r14 +; block0: +; rll %r2, %r2, 15 +; br %r14 function %rotr_i16_reg(i16, i16) -> i16 { block0(v0: i16, v1: i16): @@ -51,14 +55,15 @@ block0(v0: i16, v1: i16): return v2 } -; check: llhr %r2, %r2 -; nextln: lcr %r4, %r3 -; nextln: nill %r3, 15 -; nextln: nill %r4, 15 -; nextln: sllk %r4, %r2, 0(%r4) -; nextln: srlk %r2, %r2, 0(%r3) -; nextln: ork %r2, %r4, %r2 -; nextln: br %r14 +; block0: +; llhr %r4, %r2 +; lcr %r5, %r3 +; nill %r3, 15 +; nill %r5, 15 +; sllk %r5, %r4, 0(%r5) +; srlk %r3, %r4, 0(%r3) +; ork %r2, %r5, %r3 +; br %r14 function %rotr_i16_imm(i16) -> i16 { block0(v0: i16): @@ -67,11 +72,12 @@ block0(v0: i16): return v2 } -; check: llhr %r2, %r2 -; nextln: sllk %r3, %r2, 6 -; nextln: srlk %r2, %r2, 10 -; nextln: ork %r2, %r3, %r2 -; nextln: br %r14 +; block0: +; llhr %r5, %r2 +; sllk %r3, %r5, 6 +; srlk %r5, %r5, 10 +; ork %r2, %r3, %r5 +; br %r14 function %rotr_i8_reg(i8, i8) -> i8 { block0(v0: i8, v1: i8): @@ -79,14 +85,15 @@ block0(v0: i8, v1: i8): return v2 } -; check: llcr %r2, %r2 -; nextln: lcr %r4, %r3 -; nextln: nill %r3, 7 -; nextln: nill %r4, 7 -; nextln: sllk %r4, %r2, 0(%r4) -; nextln: srlk %r2, %r2, 0(%r3) -; nextln: ork %r2, %r4, %r2 -; nextln: br %r14 +; block0: +; llcr %r4, %r2 +; lcr %r5, %r3 +; nill %r3, 7 +; nill %r5, 7 +; sllk %r5, %r4, 0(%r5) +; srlk %r3, %r4, 0(%r3) +; ork %r2, %r5, %r3 +; br %r14 function %rotr_i8_imm(i8) -> i8 { block0(v0: i8): @@ -95,15 +102,12 @@ block0(v0: i8): return v2 } -; check: llcr %r2, %r2 -; nextln: sllk %r3, %r2, 5 -; nextln: srlk %r2, %r2, 3 -; nextln: ork %r2, %r3, %r2 -; nextln: br %r14 - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; ROTL -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; block0: +; llcr %r5, %r2 +; sllk %r3, %r5, 5 +; srlk %r5, %r5, 3 +; ork %r2, %r3, %r5 +; br %r14 function %rotl_i64_reg(i64, i64) -> i64 { block0(v0: i64, v1: i64): @@ -111,8 +115,9 @@ block0(v0: i64, v1: i64): return v2 } -; check: rllg %r2, %r2, 0(%r3) -; nextln: br %r14 +; block0: +; rllg %r2, %r2, 0(%r3) +; br %r14 function %rotl_i64_imm(i64) -> i64 { block0(v0: i64): @@ -121,8 +126,9 @@ block0(v0: i64): return v2 } -; check: rllg %r2, %r2, 17 -; nextln: br %r14 +; block0: +; rllg %r2, %r2, 17 +; br %r14 function %rotl_i32_reg(i32, i32) -> i32 { block0(v0: i32, v1: i32): @@ -130,8 +136,9 @@ block0(v0: i32, v1: i32): return v2 } -; check: rll %r2, %r2, 0(%r3) -; nextln: br %r14 +; block0: +; rll %r2, %r2, 0(%r3) +; br %r14 function %rotl_i32_imm(i32) -> i32 { block0(v0: i32): @@ -140,8 +147,9 @@ block0(v0: i32): return v2 } -; check: rll %r2, %r2, 17 -; nextln: br %r14 +; block0: +; rll %r2, %r2, 17 +; br %r14 function %rotl_i16_reg(i16, i16) -> i16 { block0(v0: i16, v1: i16): @@ -149,14 +157,15 @@ block0(v0: i16, v1: i16): return v2 } -; check: llhr %r2, %r2 -; nextln: lcr %r4, %r3 -; nextln: nill %r3, 15 -; nextln: nill %r4, 15 -; nextln: sllk %r3, %r2, 0(%r3) -; nextln: srlk %r2, %r2, 0(%r4) -; nextln: ork %r2, %r3, %r2 -; nextln: br %r14 +; block0: +; llhr %r4, %r2 +; lcr %r5, %r3 +; nill %r3, 15 +; nill %r5, 15 +; sllk %r2, %r4, 0(%r3) +; srlk %r3, %r4, 0(%r5) +; or %r2, %r3 +; br %r14 function %rotl_i16_imm(i16) -> i16 { block0(v0: i16): @@ -165,11 +174,12 @@ block0(v0: i16): return v2 } -; check: llhr %r2, %r2 -; nextln: sllk %r3, %r2, 10 -; nextln: srlk %r2, %r2, 6 -; nextln: ork %r2, %r3, %r2 -; nextln: br %r14 +; block0: +; llhr %r5, %r2 +; sllk %r3, %r5, 10 +; srlk %r5, %r5, 6 +; ork %r2, %r3, %r5 +; br %r14 function %rotl_i8_reg(i8, i8) -> i8 { block0(v0: i8, v1: i8): @@ -177,14 +187,15 @@ block0(v0: i8, v1: i8): return v2 } -; check: llcr %r2, %r2 -; nextln: lcr %r4, %r3 -; nextln: nill %r3, 7 -; nextln: nill %r4, 7 -; nextln: sllk %r3, %r2, 0(%r3) -; nextln: srlk %r2, %r2, 0(%r4) -; nextln: ork %r2, %r3, %r2 -; nextln: br %r14 +; block0: +; llcr %r4, %r2 +; lcr %r5, %r3 +; nill %r3, 7 +; nill %r5, 7 +; sllk %r2, %r4, 0(%r3) +; srlk %r3, %r4, 0(%r5) +; or %r2, %r3 +; br %r14 function %rotr_i8_imm(i8) -> i8 { block0(v0: i8): @@ -193,15 +204,12 @@ block0(v0: i8): return v2 } -; check: llcr %r2, %r2 -; nextln: sllk %r3, %r2, 3 -; nextln: srlk %r2, %r2, 5 -; nextln: ork %r2, %r3, %r2 -; nextln: br %r14 - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; USHR -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; block0: +; llcr %r5, %r2 +; sllk %r3, %r5, 3 +; srlk %r5, %r5, 5 +; ork %r2, %r3, %r5 +; br %r14 function %ushr_i64_reg(i64, i64) -> i64 { block0(v0: i64, v1: i64): @@ -209,8 +217,9 @@ block0(v0: i64, v1: i64): return v2 } -; check: srlg %r2, %r2, 0(%r3) -; nextln: br %r14 +; block0: +; srlg %r2, %r2, 0(%r3) +; br %r14 function %ushr_i64_imm(i64) -> i64 { block0(v0: i64): @@ -219,8 +228,9 @@ block0(v0: i64): return v2 } -; check: srlg %r2, %r2, 17 -; nextln: br %r14 +; block0: +; srlg %r2, %r2, 17 +; br %r14 function %ushr_i32_reg(i32, i32) -> i32 { block0(v0: i32, v1: i32): @@ -228,8 +238,10 @@ block0(v0: i32, v1: i32): return v2 } -; check: srlk %r2, %r2, 0(%r3) -; nextln: br %r14 +; block0: +; nill %r3, 31 +; srlk %r2, %r2, 0(%r3) +; br %r14 function %ushr_i32_imm(i32) -> i32 { block0(v0: i32): @@ -238,8 +250,9 @@ block0(v0: i32): return v2 } -; check: srlk %r2, %r2, 17 -; nextln: br %r14 +; block0: +; srlk %r2, %r2, 17 +; br %r14 function %ushr_i16_reg(i16, i16) -> i16 { block0(v0: i16, v1: i16): @@ -247,10 +260,11 @@ block0(v0: i16, v1: i16): return v2 } -; check: llhr %r2, %r2 -; nextln: nill %r3, 15 -; nextln: srlk %r2, %r2, 0(%r3) -; nextln: br %r14 +; block0: +; llhr %r4, %r2 +; nill %r3, 15 +; srlk %r2, %r4, 0(%r3) +; br %r14 function %ushr_i16_imm(i16) -> i16 { block0(v0: i16): @@ -259,9 +273,10 @@ block0(v0: i16): return v2 } -; check: llhr %r2, %r2 -; nextln: srlk %r2, %r2, 10 -; nextln: br %r14 +; block0: +; llhr %r5, %r2 +; srlk %r2, %r5, 10 +; br %r14 function %ushr_i8_reg(i8, i8) -> i8 { block0(v0: i8, v1: i8): @@ -269,10 +284,11 @@ block0(v0: i8, v1: i8): return v2 } -; check: llcr %r2, %r2 -; nextln: nill %r3, 7 -; nextln: srlk %r2, %r2, 0(%r3) -; nextln: br %r14 +; block0: +; llcr %r4, %r2 +; nill %r3, 7 +; srlk %r2, %r4, 0(%r3) +; br %r14 function %ushr_i8_imm(i8) -> i8 { block0(v0: i8): @@ -281,13 +297,10 @@ block0(v0: i8): return v2 } -; check: llcr %r2, %r2 -; nextln: srlk %r2, %r2, 3 -; nextln: br %r14 - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; ISHL -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; block0: +; llcr %r5, %r2 +; srlk %r2, %r5, 3 +; br %r14 function %ishl_i64_reg(i64, i64) -> i64 { block0(v0: i64, v1: i64): @@ -295,8 +308,9 @@ block0(v0: i64, v1: i64): return v2 } -; check: sllg %r2, %r2, 0(%r3) -; nextln: br %r14 +; block0: +; sllg %r2, %r2, 0(%r3) +; br %r14 function %ishl_i64_imm(i64) -> i64 { block0(v0: i64): @@ -305,8 +319,9 @@ block0(v0: i64): return v2 } -; check: sllg %r2, %r2, 17 -; nextln: br %r14 +; block0: +; sllg %r2, %r2, 17 +; br %r14 function %ishl_i32_reg(i32, i32) -> i32 { block0(v0: i32, v1: i32): @@ -314,8 +329,10 @@ block0(v0: i32, v1: i32): return v2 } -; check: sllk %r2, %r2, 0(%r3) -; nextln: br %r14 +; block0: +; nill %r3, 31 +; sllk %r2, %r2, 0(%r3) +; br %r14 function %ishl_i32_imm(i32) -> i32 { block0(v0: i32): @@ -324,8 +341,9 @@ block0(v0: i32): return v2 } -; check: sllk %r2, %r2, 17 -; nextln: br %r14 +; block0: +; sllk %r2, %r2, 17 +; br %r14 function %ishl_i16_reg(i16, i16) -> i16 { block0(v0: i16, v1: i16): @@ -333,9 +351,10 @@ block0(v0: i16, v1: i16): return v2 } -; check: nill %r3, 15 -; nextln: sllk %r2, %r2, 0(%r3) -; nextln: br %r14 +; block0: +; nill %r3, 15 +; sllk %r2, %r2, 0(%r3) +; br %r14 function %ishl_i16_imm(i16) -> i16 { block0(v0: i16): @@ -344,8 +363,9 @@ block0(v0: i16): return v2 } -; check: sllk %r2, %r2, 10 -; nextln: br %r14 +; block0: +; sllk %r2, %r2, 10 +; br %r14 function %ishl_i8_reg(i8, i8) -> i8 { block0(v0: i8, v1: i8): @@ -353,9 +373,10 @@ block0(v0: i8, v1: i8): return v2 } -; check: nill %r3, 7 -; nextln: sllk %r2, %r2, 0(%r3) -; nextln: br %r14 +; block0: +; nill %r3, 7 +; sllk %r2, %r2, 0(%r3) +; br %r14 function %ishl_i8_imm(i8) -> i8 { block0(v0: i8): @@ -364,12 +385,9 @@ block0(v0: i8): return v2 } -; check: sllk %r2, %r2, 3 -; nextln: br %r14 - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; SSHR -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; block0: +; sllk %r2, %r2, 3 +; br %r14 function %sshr_i64_reg(i64, i64) -> i64 { block0(v0: i64, v1: i64): @@ -377,8 +395,9 @@ block0(v0: i64, v1: i64): return v2 } -; check: srag %r2, %r2, 0(%r3) -; nextln: br %r14 +; block0: +; srag %r2, %r2, 0(%r3) +; br %r14 function %sshr_i64_imm(i64) -> i64 { block0(v0: i64): @@ -387,8 +406,9 @@ block0(v0: i64): return v2 } -; check: srag %r2, %r2, 17 -; nextln: br %r14 +; block0: +; srag %r2, %r2, 17 +; br %r14 function %sshr_i32_reg(i32, i32) -> i32 { block0(v0: i32, v1: i32): @@ -396,8 +416,10 @@ block0(v0: i32, v1: i32): return v2 } -; check: srak %r2, %r2, 0(%r3) -; nextln: br %r14 +; block0: +; nill %r3, 31 +; srak %r2, %r2, 0(%r3) +; br %r14 function %sshr_i32_imm(i32) -> i32 { block0(v0: i32): @@ -406,8 +428,9 @@ block0(v0: i32): return v2 } -; check: srak %r2, %r2, 17 -; nextln: br %r14 +; block0: +; srak %r2, %r2, 17 +; br %r14 function %sshr_i16_reg(i16, i16) -> i16 { block0(v0: i16, v1: i16): @@ -415,10 +438,11 @@ block0(v0: i16, v1: i16): return v2 } -; check: lhr %r2, %r2 -; nextln: nill %r3, 15 -; nextln: srak %r2, %r2, 0(%r3) -; nextln: br %r14 +; block0: +; lhr %r4, %r2 +; nill %r3, 15 +; srak %r2, %r4, 0(%r3) +; br %r14 function %sshr_i16_imm(i16) -> i16 { block0(v0: i16): @@ -427,9 +451,10 @@ block0(v0: i16): return v2 } -; check: lhr %r2, %r2 -; nextln: srak %r2, %r2, 10 -; nextln: br %r14 +; block0: +; lhr %r5, %r2 +; srak %r2, %r5, 10 +; br %r14 function %sshr_i8_reg(i8, i8) -> i8 { block0(v0: i8, v1: i8): @@ -437,10 +462,11 @@ block0(v0: i8, v1: i8): return v2 } -; check: lbr %r2, %r2 -; nextln: nill %r3, 7 -; nextln: srak %r2, %r2, 0(%r3) -; nextln: br %r14 +; block0: +; lbr %r4, %r2 +; nill %r3, 7 +; srak %r2, %r4, 0(%r3) +; br %r14 function %sshr_i8_imm(i8) -> i8 { block0(v0: i8): @@ -449,7 +475,8 @@ block0(v0: i8): return v2 } -; check: lbr %r2, %r2 -; nextln: srak %r2, %r2, 3 -; nextln: br %r14 +; block0: +; lbr %r5, %r2 +; srak %r2, %r5, 3 +; br %r14 diff --git a/cranelift/filetests/filetests/isa/s390x/stack-limit.clif b/cranelift/filetests/filetests/isa/s390x/stack-limit.clif index 0ef7320340..e2f802ab24 100644 --- a/cranelift/filetests/filetests/isa/s390x/stack-limit.clif +++ b/cranelift/filetests/filetests/isa/s390x/stack-limit.clif @@ -1,4 +1,4 @@ -test compile +test compile precise-output target s390x function %foo() { @@ -6,12 +6,16 @@ block0: return } +; block0: +; br %r14 + function %stack_limit_leaf_zero(i64 stack_limit) { block0(v0: i64): return } -; check: br %r14 +; block0: +; br %r14 function %stack_limit_gv_leaf_zero(i64 vmctx) { gv0 = vmctx @@ -22,8 +26,8 @@ block0(v0: i64): return } -; check: br %r14 - +; block0: +; br %r14 function %stack_limit_call_zero(i64 stack_limit) { fn0 = %foo() @@ -32,14 +36,15 @@ block0(v0: i64): return } -; check: clgrtle %r15, %r2 -; nextln: stmg %r14, %r15, 112(%r15) -; nextln: aghi %r15, -160 -; nextln: virtual_sp_offset_adjust 160 -; nextln: bras %r1, 12 ; data %foo + 0 ; lg %r2, 0(%r1) -; nextln: basr %r14, %r2 -; nextln: lmg %r14, %r15, 272(%r15) -; nextln: br %r14 +; clgrtle %r15, %r2 +; stmg %r14, %r15, 112(%r15) +; aghi %r15, -160 +; virtual_sp_offset_adjust 160 +; block0: +; bras %r1, 12 ; data %foo + 0 ; lg %r4, 0(%r1) +; basr %r14, %r4 +; lmg %r14, %r15, 272(%r15) +; br %r14 function %stack_limit_gv_call_zero(i64 vmctx) { gv0 = vmctx @@ -52,16 +57,17 @@ block0(v0: i64): return } -; check: lg %r1, 0(%r2) -; nextln: lg %r1, 4(%r1) -; nextln: clgrtle %r15, %r1 -; nextln: stmg %r14, %r15, 112(%r15) -; nextln: aghi %r15, -160 -; nextln: virtual_sp_offset_adjust 160 -; nextln: bras %r1, 12 ; data %foo + 0 ; lg %r2, 0(%r1) -; nextln: basr %r14, %r2 -; nextln: lmg %r14, %r15, 272(%r15) -; nextln: br %r14 +; lg %r1, 0(%r2) +; lg %r1, 4(%r1) +; clgrtle %r15, %r1 +; stmg %r14, %r15, 112(%r15) +; aghi %r15, -160 +; virtual_sp_offset_adjust 160 +; block0: +; bras %r1, 12 ; data %foo + 0 ; lg %r4, 0(%r1) +; basr %r14, %r4 +; lmg %r14, %r15, 272(%r15) +; br %r14 function %stack_limit(i64 stack_limit) { ss0 = explicit_slot 168 @@ -69,11 +75,12 @@ block0(v0: i64): return } -; check: la %r1, 168(%r2) -; nextln: clgrtle %r15, %r1 -; nextln: aghi %r15, -168 -; nextln: aghi %r15, 168 -; nextln: br %r14 +; la %r1, 168(%r2) +; clgrtle %r15, %r1 +; aghi %r15, -168 +; block0: +; aghi %r15, 168 +; br %r14 function %large_stack_limit(i64 stack_limit) { ss0 = explicit_slot 400000 @@ -81,12 +88,13 @@ block0(v0: i64): return } -; check: clgrtle %r15, %r2 -; nextln: lay %r1, 400000(%r2) -; nextln: clgrtle %r15, %r1 -; nextln: agfi %r15, -400000 -; nextln: agfi %r15, 400000 -; nextln: br %r14 +; clgrtle %r15, %r2 +; lay %r1, 400000(%r2) +; clgrtle %r15, %r1 +; agfi %r15, -400000 +; block0: +; agfi %r15, 400000 +; br %r14 function %huge_stack_limit(i64 stack_limit) { ss0 = explicit_slot 4000000 @@ -94,13 +102,14 @@ block0(v0: i64): return } -; check: clgrtle %r15, %r2 -; nextln: lgr %r1, %r2 -; nextln: algfi %r1, 4000000 -; nextln: clgrtle %r15, %r1 -; nextln: agfi %r15, -4000000 -; nextln: agfi %r15, 4000000 -; nextln: br %r14 +; clgrtle %r15, %r2 +; lgr %r1, %r2 +; algfi %r1, 4000000 +; clgrtle %r15, %r1 +; agfi %r15, -4000000 +; block0: +; agfi %r15, 4000000 +; br %r14 function %limit_preamble(i64 vmctx) { gv0 = vmctx @@ -112,13 +121,14 @@ block0(v0: i64): return } -; check: lg %r1, 0(%r2) -; nextln: lg %r1, 4(%r1) -; nextln: la %r1, 24(%r1) -; nextln: clgrtle %r15, %r1 -; nextln: aghi %r15, -24 -; nextln: aghi %r15, 24 -; nextln: br %r14 +; lg %r1, 0(%r2) +; lg %r1, 4(%r1) +; la %r1, 24(%r1) +; clgrtle %r15, %r1 +; aghi %r15, -24 +; block0: +; aghi %r15, 24 +; br %r14 function %limit_preamble_large(i64 vmctx) { gv0 = vmctx @@ -130,14 +140,15 @@ block0(v0: i64): return } -; check: lg %r1, 0(%r2) -; nextln: lg %r1, 4(%r1) -; nextln: clgrtle %r15, %r1 -; nextln: lay %r1, 400000(%r1) -; nextln: clgrtle %r15, %r1 -; nextln: agfi %r15, -400000 -; nextln: agfi %r15, 400000 -; nextln: br %r14 +; lg %r1, 0(%r2) +; lg %r1, 4(%r1) +; clgrtle %r15, %r1 +; lay %r1, 400000(%r1) +; clgrtle %r15, %r1 +; agfi %r15, -400000 +; block0: +; agfi %r15, 400000 +; br %r14 function %limit_preamble_huge(i64 vmctx) { gv0 = vmctx @@ -149,14 +160,15 @@ block0(v0: i64): return } -; check: lg %r1, 0(%r2) -; nextln: lg %r1, 4(%r1) -; nextln: clgrtle %r15, %r1 -; nextln: algfi %r1, 4000000 -; nextln: clgrtle %r15, %r1 -; nextln: agfi %r15, -4000000 -; nextln: agfi %r15, 4000000 -; nextln: br %r14 +; lg %r1, 0(%r2) +; lg %r1, 4(%r1) +; clgrtle %r15, %r1 +; algfi %r1, 4000000 +; clgrtle %r15, %r1 +; agfi %r15, -4000000 +; block0: +; agfi %r15, 4000000 +; br %r14 function %limit_preamble_huge_offset(i64 vmctx) { gv0 = vmctx @@ -167,9 +179,11 @@ block0(v0: i64): return } -; check: lgfi %r1, 1000000 ; lg %r1, 0(%r1,%r2) -; nextln: la %r1, 24(%r1) -; nextln: clgrtle %r15, %r1 -; nextln: aghi %r15, -24 -; nextln: aghi %r15, 24 -; nextln: br %r14 +; lgfi %r1, 1000000 ; lg %r1, 0(%r1,%r2) +; la %r1, 24(%r1) +; clgrtle %r15, %r1 +; aghi %r15, -24 +; block0: +; aghi %r15, 24 +; br %r14 + diff --git a/cranelift/filetests/filetests/isa/s390x/stack.clif b/cranelift/filetests/filetests/isa/s390x/stack.clif index 1ac80b9fd0..d75edd6f88 100644 --- a/cranelift/filetests/filetests/isa/s390x/stack.clif +++ b/cranelift/filetests/filetests/isa/s390x/stack.clif @@ -1,4 +1,4 @@ -test compile +test compile precise-output target s390x ; FIXME: Should allocate register save area. @@ -11,10 +11,11 @@ block0: return v0 } -; check: aghi %r15, -8 -; nextln: la %r2, 0(%r15) -; nextln: aghi %r15, 8 -; nextln: br %r14 +; aghi %r15, -8 +; block0: +; la %r2, 0(%r15) +; aghi %r15, 8 +; br %r14 function %stack_addr_big() -> i64 { ss0 = explicit_slot 100000 @@ -25,12 +26,11 @@ block0: return v0 } -; check: agfi %r15, -100008 -; nextln: la %r2, 0(%r15) -; nextln: agfi %r15, 100008 -; nextln: br %r14 - -; FIXME: don't use stack_addr legalization for stack_load and stack_store +; agfi %r15, -100008 +; block0: +; la %r2, 0(%r15) +; agfi %r15, 100008 +; br %r14 function %stack_load_small() -> i64 { ss0 = explicit_slot 8 @@ -40,11 +40,12 @@ block0: return v0 } -; check: aghi %r15, -8 -; nextln: la %r2, 0(%r15) -; nextln: lg %r2, 0(%r2) -; nextln: aghi %r15, 8 -; nextln: br %r14 +; aghi %r15, -8 +; block0: +; la %r4, 0(%r15) +; lg %r2, 0(%r4) +; aghi %r15, 8 +; br %r14 function %stack_load_big() -> i64 { ss0 = explicit_slot 100000 @@ -55,12 +56,12 @@ block0: return v0 } -; check: agfi %r15, -100008 -; nextln: la %r2, 0(%r15) -; nextln: lg %r2, 0(%r2) -; nextln: agfi %r15, 100008 -; nextln: br %r14 - +; agfi %r15, -100008 +; block0: +; la %r4, 0(%r15) +; lg %r2, 0(%r4) +; agfi %r15, 100008 +; br %r14 function %stack_store_small(i64) { ss0 = explicit_slot 8 @@ -70,11 +71,12 @@ block0(v0: i64): return } -; check: aghi %r15, -8 -; nextln: la %r3, 0(%r15) -; nextln: stg %r2, 0(%r3) -; nextln: aghi %r15, 8 -; nextln: br %r14 +; aghi %r15, -8 +; block0: +; la %r4, 0(%r15) +; stg %r2, 0(%r4) +; aghi %r15, 8 +; br %r14 function %stack_store_big(i64) { ss0 = explicit_slot 100000 @@ -85,9 +87,10 @@ block0(v0: i64): return } -; check: agfi %r15, -100008 -; nextln: la %r3, 0(%r15) -; nextln: stg %r2, 0(%r3) -; nextln: agfi %r15, 100008 -; nextln: br %r14 +; agfi %r15, -100008 +; block0: +; la %r4, 0(%r15) +; stg %r2, 0(%r4) +; agfi %r15, 100008 +; br %r14 diff --git a/cranelift/filetests/filetests/isa/s390x/store-little.clif b/cranelift/filetests/filetests/isa/s390x/store-little.clif index 65c9ffab93..79b172ff72 100644 --- a/cranelift/filetests/filetests/isa/s390x/store-little.clif +++ b/cranelift/filetests/filetests/isa/s390x/store-little.clif @@ -1,4 +1,4 @@ -test compile +test compile precise-output target s390x function %store_i64(i64, i64) { @@ -7,8 +7,9 @@ block0(v0: i64, v1: i64): return } -; check: strvg %r2, 0(%r3) -; nextln: br %r14 +; block0: +; strvg %r2, 0(%r3) +; br %r14 function %store_i64_sym(i64) { gv0 = symbol colocated %sym @@ -18,8 +19,9 @@ block0(v0: i64): return } -; check: larl %r1, %sym + 0 ; strvg %r2, 0(%r1) -; nextln: br %r14 +; block0: +; larl %r1, %sym + 0 ; strvg %r2, 0(%r1) +; br %r14 function %store_imm_i64(i64) { block0(v0: i64): @@ -28,9 +30,10 @@ block0(v0: i64): return } -; check: lghi %r3, 12345 -; nextln: strvg %r3, 0(%r2) -; nextln: br %r14 +; block0: +; lghi %r4, 12345 +; strvg %r4, 0(%r2) +; br %r14 function %istore8_i64(i64, i64) { block0(v0: i64, v1: i64): @@ -38,8 +41,9 @@ block0(v0: i64, v1: i64): return } -; check: stc %r2, 0(%r3) -; nextln: br %r14 +; block0: +; stc %r2, 0(%r3) +; br %r14 function %istore8_imm_i64(i64) { block0(v0: i64): @@ -48,8 +52,9 @@ block0(v0: i64): return } -; check: mvi 0(%r2), 123 -; nextln: br %r14 +; block0: +; mvi 0(%r2), 123 +; br %r14 function %istore16_i64(i64, i64) { block0(v0: i64, v1: i64): @@ -57,8 +62,9 @@ block0(v0: i64, v1: i64): return } -; check: strvh %r2, 0(%r3) -; nextln: br %r14 +; block0: +; strvh %r2, 0(%r3) +; br %r14 function %istore16_i64_sym(i64) { gv0 = symbol colocated %sym @@ -68,8 +74,9 @@ block0(v0: i64): return } -; check: larl %r1, %sym + 0 ; strvh %r2, 0(%r1) -; nextln: br %r14 +; block0: +; larl %r1, %sym + 0 ; strvh %r2, 0(%r1) +; br %r14 function %istore16_imm_i64(i64) { block0(v0: i64): @@ -78,8 +85,9 @@ block0(v0: i64): return } -; check: mvhhi 0(%r2), 14640 -; nextln: br %r14 +; block0: +; mvhhi 0(%r2), 14640 +; br %r14 function %istore32_i64(i64, i64) { block0(v0: i64, v1: i64): @@ -87,8 +95,9 @@ block0(v0: i64, v1: i64): return } -; check: strv %r2, 0(%r3) -; nextln: br %r14 +; block0: +; strv %r2, 0(%r3) +; br %r14 function %istore32_i64_sym(i64) { gv0 = symbol colocated %sym @@ -98,8 +107,9 @@ block0(v0: i64): return } -; check: larl %r1, %sym + 0 ; strv %r2, 0(%r1) -; nextln: br %r14 +; block0: +; larl %r1, %sym + 0 ; strv %r2, 0(%r1) +; br %r14 function %istore32_imm_i64(i64) { block0(v0: i64): @@ -108,9 +118,10 @@ block0(v0: i64): return } -; check: lghi %r3, 12345 -; nextln: strv %r3, 0(%r2) -; nextln: br %r14 +; block0: +; lghi %r4, 12345 +; strv %r4, 0(%r2) +; br %r14 function %store_i32(i32, i64) { block0(v0: i32, v1: i64): @@ -118,8 +129,9 @@ block0(v0: i32, v1: i64): return } -; check: strv %r2, 0(%r3) -; nextln: br %r14 +; block0: +; strv %r2, 0(%r3) +; br %r14 function %store_i32_sym(i32) { gv0 = symbol colocated %sym @@ -129,8 +141,9 @@ block0(v0: i32): return } -; check: larl %r1, %sym + 0 ; strv %r2, 0(%r1) -; nextln: br %r14 +; block0: +; larl %r1, %sym + 0 ; strv %r2, 0(%r1) +; br %r14 function %store_imm_i32(i64) { block0(v0: i64): @@ -139,9 +152,10 @@ block0(v0: i64): return } -; check: lhi %r3, 12345 -; nextln: strv %r3, 0(%r2) -; nextln: br %r14 +; block0: +; lhi %r4, 12345 +; strv %r4, 0(%r2) +; br %r14 function %istore8_i32(i32, i64) { block0(v0: i32, v1: i64): @@ -149,8 +163,9 @@ block0(v0: i32, v1: i64): return } -; check: stc %r2, 0(%r3) -; nextln: br %r14 +; block0: +; stc %r2, 0(%r3) +; br %r14 function %istore8_imm_i32(i64) { block0(v0: i64): @@ -159,8 +174,9 @@ block0(v0: i64): return } -; check: mvi 0(%r2), 123 -; nextln: br %r14 +; block0: +; mvi 0(%r2), 123 +; br %r14 function %istore16_i32(i32, i64) { block0(v0: i32, v1: i64): @@ -168,8 +184,9 @@ block0(v0: i32, v1: i64): return } -; check: strvh %r2, 0(%r3) -; nextln: br %r14 +; block0: +; strvh %r2, 0(%r3) +; br %r14 function %istore16_i32_sym(i32) { gv0 = symbol colocated %sym @@ -179,8 +196,9 @@ block0(v0: i32): return } -; check: larl %r1, %sym + 0 ; strvh %r2, 0(%r1) -; nextln: br %r14 +; block0: +; larl %r1, %sym + 0 ; strvh %r2, 0(%r1) +; br %r14 function %istore16_imm_i32(i64) { block0(v0: i64): @@ -189,8 +207,9 @@ block0(v0: i64): return } -; check: mvhhi 0(%r2), 14640 -; nextln: br %r14 +; block0: +; mvhhi 0(%r2), 14640 +; br %r14 function %store_i16(i16, i64) { block0(v0: i16, v1: i64): @@ -198,8 +217,9 @@ block0(v0: i16, v1: i64): return } -; check: strvh %r2, 0(%r3) -; nextln: br %r14 +; block0: +; strvh %r2, 0(%r3) +; br %r14 function %store_i16_sym(i16) { gv0 = symbol colocated %sym @@ -209,8 +229,9 @@ block0(v0: i16): return } -; check: larl %r1, %sym + 0 ; strvh %r2, 0(%r1) -; nextln: br %r14 +; block0: +; larl %r1, %sym + 0 ; strvh %r2, 0(%r1) +; br %r14 function %store_imm_i16(i64) { block0(v0: i64): @@ -219,8 +240,9 @@ block0(v0: i64): return } -; check: mvhhi 0(%r2), 14640 -; nextln: br %r14 +; block0: +; mvhhi 0(%r2), 14640 +; br %r14 function %istore8_i16(i16, i64) { block0(v0: i16, v1: i64): @@ -228,8 +250,9 @@ block0(v0: i16, v1: i64): return } -; check: stc %r2, 0(%r3) -; nextln: br %r14 +; block0: +; stc %r2, 0(%r3) +; br %r14 function %istore8_imm_i16(i64) { block0(v0: i64): @@ -238,8 +261,9 @@ block0(v0: i64): return } -; check: mvi 0(%r2), 123 -; nextln: br %r14 +; block0: +; mvi 0(%r2), 123 +; br %r14 function %store_i8(i8, i64) { block0(v0: i8, v1: i64): @@ -247,8 +271,9 @@ block0(v0: i8, v1: i64): return } -; check: stc %r2, 0(%r3) -; nextln: br %r14 +; block0: +; stc %r2, 0(%r3) +; br %r14 function %store_i8_off(i8, i64) { block0(v0: i8, v1: i64): @@ -256,8 +281,9 @@ block0(v0: i8, v1: i64): return } -; check: stcy %r2, 4096(%r3) -; nextln: br %r14 +; block0: +; stcy %r2, 4096(%r3) +; br %r14 function %store_imm_i8(i64) { block0(v0: i64): @@ -266,8 +292,9 @@ block0(v0: i64): return } -; check: mvi 0(%r2), 123 -; nextln: br %r14 +; block0: +; mvi 0(%r2), 123 +; br %r14 function %store_imm_i8_off(i64) { block0(v0: i64): @@ -276,6 +303,7 @@ block0(v0: i64): return } -; check: mviy 4096(%r2), 123 -; nextln: br %r14 +; block0: +; mviy 4096(%r2), 123 +; br %r14 diff --git a/cranelift/filetests/filetests/isa/s390x/store.clif b/cranelift/filetests/filetests/isa/s390x/store.clif index c7f896ae29..b0cea254e4 100644 --- a/cranelift/filetests/filetests/isa/s390x/store.clif +++ b/cranelift/filetests/filetests/isa/s390x/store.clif @@ -1,4 +1,4 @@ -test compile +test compile precise-output target s390x function %store_i64(i64, i64) { @@ -7,8 +7,9 @@ block0(v0: i64, v1: i64): return } -; check: stg %r2, 0(%r3) -; nextln: br %r14 +; block0: +; stg %r2, 0(%r3) +; br %r14 function %store_i64_sym(i64) { gv0 = symbol colocated %sym @@ -18,8 +19,9 @@ block0(v0: i64): return } -; check: stgrl %r2, %sym + 0 -; nextln: br %r14 +; block0: +; stgrl %r2, %sym + 0 +; br %r14 function %store_imm_i64(i64) { block0(v0: i64): @@ -28,8 +30,9 @@ block0(v0: i64): return } -; check: mvghi 0(%r2), 12345 -; nextln: br %r14 +; block0: +; mvghi 0(%r2), 12345 +; br %r14 function %istore8_i64(i64, i64) { block0(v0: i64, v1: i64): @@ -37,8 +40,9 @@ block0(v0: i64, v1: i64): return } -; check: stc %r2, 0(%r3) -; nextln: br %r14 +; block0: +; stc %r2, 0(%r3) +; br %r14 function %istore8_imm_i64(i64) { block0(v0: i64): @@ -47,8 +51,9 @@ block0(v0: i64): return } -; check: mvi 0(%r2), 123 -; nextln: br %r14 +; block0: +; mvi 0(%r2), 123 +; br %r14 function %istore16_i64(i64, i64) { block0(v0: i64, v1: i64): @@ -56,8 +61,9 @@ block0(v0: i64, v1: i64): return } -; check: sth %r2, 0(%r3) -; nextln: br %r14 +; block0: +; sth %r2, 0(%r3) +; br %r14 function %istore16_i64_sym(i64) { gv0 = symbol colocated %sym @@ -67,8 +73,9 @@ block0(v0: i64): return } -; check: sthrl %r2, %sym + 0 -; nextln: br %r14 +; block0: +; sthrl %r2, %sym + 0 +; br %r14 function %istore16_imm_i64(i64) { block0(v0: i64): @@ -77,8 +84,9 @@ block0(v0: i64): return } -; check: mvhhi 0(%r2), 12345 -; nextln: br %r14 +; block0: +; mvhhi 0(%r2), 12345 +; br %r14 function %istore32_i64(i64, i64) { block0(v0: i64, v1: i64): @@ -86,8 +94,9 @@ block0(v0: i64, v1: i64): return } -; check: st %r2, 0(%r3) -; nextln: br %r14 +; block0: +; st %r2, 0(%r3) +; br %r14 function %istore32_i64_sym(i64) { gv0 = symbol colocated %sym @@ -97,8 +106,9 @@ block0(v0: i64): return } -; check: strl %r2, %sym + 0 -; nextln: br %r14 +; block0: +; strl %r2, %sym + 0 +; br %r14 function %istore32_imm_i64(i64) { block0(v0: i64): @@ -107,8 +117,9 @@ block0(v0: i64): return } -; check: mvhi 0(%r2), 12345 -; nextln: br %r14 +; block0: +; mvhi 0(%r2), 12345 +; br %r14 function %store_i32(i32, i64) { block0(v0: i32, v1: i64): @@ -116,8 +127,9 @@ block0(v0: i32, v1: i64): return } -; check: st %r2, 0(%r3) -; nextln: br %r14 +; block0: +; st %r2, 0(%r3) +; br %r14 function %store_i32_sym(i32) { gv0 = symbol colocated %sym @@ -127,8 +139,9 @@ block0(v0: i32): return } -; check: strl %r2, %sym + 0 -; nextln: br %r14 +; block0: +; strl %r2, %sym + 0 +; br %r14 function %store_i32_off(i32, i64) { block0(v0: i32, v1: i64): @@ -136,8 +149,9 @@ block0(v0: i32, v1: i64): return } -; check: sty %r2, 4096(%r3) -; nextln: br %r14 +; block0: +; sty %r2, 4096(%r3) +; br %r14 function %store_imm_i32(i64) { block0(v0: i64): @@ -146,8 +160,9 @@ block0(v0: i64): return } -; check: mvhi 0(%r2), 12345 -; nextln: br %r14 +; block0: +; mvhi 0(%r2), 12345 +; br %r14 function %istore8_i32(i32, i64) { block0(v0: i32, v1: i64): @@ -155,8 +170,9 @@ block0(v0: i32, v1: i64): return } -; check: stc %r2, 0(%r3) -; nextln: br %r14 +; block0: +; stc %r2, 0(%r3) +; br %r14 function %istore8_imm_i32(i64) { block0(v0: i64): @@ -165,8 +181,9 @@ block0(v0: i64): return } -; check: mvi 0(%r2), 123 -; nextln: br %r14 +; block0: +; mvi 0(%r2), 123 +; br %r14 function %istore16_i32(i32, i64) { block0(v0: i32, v1: i64): @@ -174,8 +191,9 @@ block0(v0: i32, v1: i64): return } -; check: sth %r2, 0(%r3) -; nextln: br %r14 +; block0: +; sth %r2, 0(%r3) +; br %r14 function %istore16_i32_sym(i32) { gv0 = symbol colocated %sym @@ -185,8 +203,9 @@ block0(v0: i32): return } -; check: sthrl %r2, %sym + 0 -; nextln: br %r14 +; block0: +; sthrl %r2, %sym + 0 +; br %r14 function %istore16_imm_i32(i64) { block0(v0: i64): @@ -195,8 +214,9 @@ block0(v0: i64): return } -; check: mvhhi 0(%r2), 12345 -; nextln: br %r14 +; block0: +; mvhhi 0(%r2), 12345 +; br %r14 function %store_i16(i16, i64) { block0(v0: i16, v1: i64): @@ -204,8 +224,9 @@ block0(v0: i16, v1: i64): return } -; check: sth %r2, 0(%r3) -; nextln: br %r14 +; block0: +; sth %r2, 0(%r3) +; br %r14 function %store_i16_sym(i16) { gv0 = symbol colocated %sym @@ -215,8 +236,9 @@ block0(v0: i16): return } -; check: sthrl %r2, %sym + 0 -; nextln: br %r14 +; block0: +; sthrl %r2, %sym + 0 +; br %r14 function %store_i16_off(i16, i64) { block0(v0: i16, v1: i64): @@ -224,8 +246,9 @@ block0(v0: i16, v1: i64): return } -; check: sthy %r2, 4096(%r3) -; nextln: br %r14 +; block0: +; sthy %r2, 4096(%r3) +; br %r14 function %store_imm_i16(i64) { block0(v0: i64): @@ -234,8 +257,9 @@ block0(v0: i64): return } -; check: mvhhi 0(%r2), 12345 -; nextln: br %r14 +; block0: +; mvhhi 0(%r2), 12345 +; br %r14 function %istore8_i16(i16, i64) { block0(v0: i16, v1: i64): @@ -243,8 +267,9 @@ block0(v0: i16, v1: i64): return } -; check: stc %r2, 0(%r3) -; nextln: br %r14 +; block0: +; stc %r2, 0(%r3) +; br %r14 function %istore8_imm_i16(i64) { block0(v0: i64): @@ -253,8 +278,9 @@ block0(v0: i64): return } -; check: mvi 0(%r2), 123 -; nextln: br %r14 +; block0: +; mvi 0(%r2), 123 +; br %r14 function %store_i8(i8, i64) { block0(v0: i8, v1: i64): @@ -262,8 +288,9 @@ block0(v0: i8, v1: i64): return } -; check: stc %r2, 0(%r3) -; nextln: br %r14 +; block0: +; stc %r2, 0(%r3) +; br %r14 function %store_i8_off(i8, i64) { block0(v0: i8, v1: i64): @@ -271,8 +298,9 @@ block0(v0: i8, v1: i64): return } -; check: stcy %r2, 4096(%r3) -; nextln: br %r14 +; block0: +; stcy %r2, 4096(%r3) +; br %r14 function %store_imm_i8(i64) { block0(v0: i64): @@ -281,8 +309,9 @@ block0(v0: i64): return } -; check: mvi 0(%r2), 123 -; nextln: br %r14 +; block0: +; mvi 0(%r2), 123 +; br %r14 function %store_imm_i8_off(i64) { block0(v0: i64): @@ -291,6 +320,7 @@ block0(v0: i64): return } -; check: mviy 4096(%r2), 123 -; nextln: br %r14 +; block0: +; mviy 4096(%r2), 123 +; br %r14 diff --git a/cranelift/filetests/filetests/isa/s390x/symbols.clif b/cranelift/filetests/filetests/isa/s390x/symbols.clif index 98d0cc8e30..c995ea94eb 100644 --- a/cranelift/filetests/filetests/isa/s390x/symbols.clif +++ b/cranelift/filetests/filetests/isa/s390x/symbols.clif @@ -1,4 +1,4 @@ -test compile +test compile precise-output target s390x ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; @@ -13,8 +13,9 @@ block0: return v0 } -; check: bras %r1, 12 ; data %my_global + 0 ; lg %r2, 0(%r1) -; nextln: br %r14 +; block0: +; bras %r1, 12 ; data %my_global + 0 ; lg %r2, 0(%r1) +; br %r14 function %symbol_value_colocated() -> i64 { gv0 = symbol colocated %my_global_colo @@ -24,12 +25,9 @@ block0: return v0 } -; check: larl %r2, %my_global_colo + 0 -; nextln: br %r14 - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; FUNC_ADDR -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; block0: +; larl %r2, %my_global_colo + 0 +; br %r14 function %func_addr() -> i64 { fn0 = %my_func(i64) -> i64 @@ -39,8 +37,9 @@ block0: return v0 } -; check: bras %r1, 12 ; data %my_func + 0 ; lg %r2, 0(%r1) -; nextln: br %r14 +; block0: +; bras %r1, 12 ; data %my_func + 0 ; lg %r2, 0(%r1) +; br %r14 function %func_addr_colocated() -> i64 { fn0 = colocated %my_func_colo(i64) -> i64 @@ -50,5 +49,7 @@ block0: return v0 } -; check: larl %r2, %my_func_colo + 0 -; nextln: br %r14 +; block0: +; larl %r2, %my_func_colo + 0 +; br %r14 + diff --git a/cranelift/filetests/filetests/isa/s390x/traps.clif b/cranelift/filetests/filetests/isa/s390x/traps.clif index a6b70cecea..d6a191b3ea 100644 --- a/cranelift/filetests/filetests/isa/s390x/traps.clif +++ b/cranelift/filetests/filetests/isa/s390x/traps.clif @@ -1,4 +1,4 @@ -test compile +test compile precise-output target s390x ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; @@ -10,18 +10,16 @@ block0: trap user0 } -; check: trap +; block0: +; trap function %resumable_trap() { block0: trap user0 } -; check: trap - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; TRAPZ -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; block0: +; trap function %trapz(i64) { block0(v0: i64): @@ -31,18 +29,13 @@ block0(v0: i64): return } -; FIXME: Does not use TrapIf internally as trapz is expanded. -; check: Block 0 -; check: clgfi %r2, 42 -; nextln: jge label1 ; jg label2 -; check: Block 1: -; check: br %r14 -; check: Block 2: -; check: trap - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; TRAPNZ/RESUMABLE_TRAPNZ -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; block0: +; clgfi %r2, 42 +; jge label1 ; jg label2 +; block1: +; br %r14 +; block2: +; trap function %trapnz(i64) { block0(v0: i64): @@ -52,14 +45,13 @@ block0(v0: i64): return } -; FIXME: Does not use TrapIf internally as trapnz is expanded. -; check: Block 0 -; check: clgfi %r2, 42 -; nextln: jgne label1 ; jg label2 -; check: Block 1: -; check: br %r14 -; check: Block 2: -; check: trap +; block0: +; clgfi %r2, 42 +; jgne label1 ; jg label2 +; block1: +; br %r14 +; block2: +; trap function %resumable_trapnz(i64) { block0(v0: i64): @@ -69,18 +61,13 @@ block0(v0: i64): return } -; FIXME: Does not use TrapIf internally as resumable_trapnz is expanded. -; check: Block 0 -; check: clgfi %r2, 42 -; nextln: jgne label1 ; jg label2 -; check: Block 1: -; check: br %r14 -; check: Block 2: -; check: trap - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; DEBUGTRAP -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; block0: +; clgfi %r2, 42 +; jgne label1 ; jg label2 +; block1: +; br %r14 +; block2: +; trap function %h() { block0: @@ -88,4 +75,7 @@ block0: return } -; check: debugtrap +; block0: +; debugtrap +; br %r14 + diff --git a/cranelift/filetests/filetests/isa/x64/amode-opt.clif b/cranelift/filetests/filetests/isa/x64/amode-opt.clif index a8aa7966ac..eca1ac1499 100644 --- a/cranelift/filetests/filetests/isa/x64/amode-opt.clif +++ b/cranelift/filetests/filetests/isa/x64/amode-opt.clif @@ -8,19 +8,13 @@ block0(v0: i64, v1: i64): return v3 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 7) -; Inst 0: pushq %rbp -; Inst 1: movq %rsp, %rbp -; Inst 2: movq 0(%rdi,%rsi,1), %rsi -; Inst 3: movq %rsi, %rax -; Inst 4: movq %rbp, %rsp -; Inst 5: popq %rbp -; Inst 6: ret -; }} +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq 0(%rdi,%rsi,1), %rax +; movq %rbp, %rsp +; popq %rbp +; ret function %amode_add_imm(i64) -> i64 { block0(v0: i64): @@ -30,19 +24,13 @@ block0(v0: i64): return v3 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 7) -; Inst 0: pushq %rbp -; Inst 1: movq %rsp, %rbp -; Inst 2: movq 42(%rdi), %rsi -; Inst 3: movq %rsi, %rax -; Inst 4: movq %rbp, %rsp -; Inst 5: popq %rbp -; Inst 6: ret -; }} +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq 42(%rdi), %rax +; movq %rbp, %rsp +; popq %rbp +; ret function %amode_add_imm_order(i64) -> i64 { block0(v0: i64): @@ -52,19 +40,13 @@ block0(v0: i64): return v3 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 7) -; Inst 0: pushq %rbp -; Inst 1: movq %rsp, %rbp -; Inst 2: movq 42(%rdi), %rsi -; Inst 3: movq %rsi, %rax -; Inst 4: movq %rbp, %rsp -; Inst 5: popq %rbp -; Inst 6: ret -; }} +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq 42(%rdi), %rax +; movq %rbp, %rsp +; popq %rbp +; ret function %amode_add_uext_imm(i64) -> i64 { block0(v0: i64): @@ -75,17 +57,11 @@ block0(v0: i64): return v4 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 7) -; Inst 0: pushq %rbp -; Inst 1: movq %rsp, %rbp -; Inst 2: movq 42(%rdi), %rsi -; Inst 3: movq %rsi, %rax -; Inst 4: movq %rbp, %rsp -; Inst 5: popq %rbp -; Inst 6: ret -; }} +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq 42(%rdi), %rax +; movq %rbp, %rsp +; popq %rbp +; ret diff --git a/cranelift/filetests/filetests/isa/x64/b1.clif b/cranelift/filetests/filetests/isa/x64/b1.clif index 9d69d70e1c..eb971b36fa 100644 --- a/cranelift/filetests/filetests/isa/x64/b1.clif +++ b/cranelift/filetests/filetests/isa/x64/b1.clif @@ -7,21 +7,15 @@ block0(v0: b1, v1: i32, v2: i32): return v3 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 9) -; Inst 0: pushq %rbp -; Inst 1: movq %rsp, %rbp -; Inst 2: testb $1, %dil -; Inst 3: movl %edx, %edi -; Inst 4: cmovnzl %esi, %edi -; Inst 5: movq %rdi, %rax -; Inst 6: movq %rbp, %rsp -; Inst 7: popq %rbp -; Inst 8: ret -; }} +; pushq %rbp +; movq %rsp, %rbp +; block0: +; testb $1, %dil +; cmovnzl %esi, %edx, %edx +; movq %rdx, %rax +; movq %rbp, %rsp +; popq %rbp +; ret function %f1(b1) -> i32 { block0(v0: b1): @@ -35,32 +29,21 @@ block2: return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (successor: Block 1) -; (successor: Block 2) -; (instruction range: 0 .. 4) -; Inst 0: pushq %rbp -; Inst 1: movq %rsp, %rbp -; Inst 2: testb $1, %dil -; Inst 3: jnz label1; j label2 -; Block 1: -; (original IR block: block1) -; (instruction range: 4 .. 8) -; Inst 4: movl $1, %eax -; Inst 5: movq %rbp, %rsp -; Inst 6: popq %rbp -; Inst 7: ret -; Block 2: -; (original IR block: block2) -; (instruction range: 8 .. 12) -; Inst 8: movl $2, %eax -; Inst 9: movq %rbp, %rsp -; Inst 10: popq %rbp -; Inst 11: ret -; }} +; pushq %rbp +; movq %rsp, %rbp +; block0: +; testb $1, %dil +; jnz label1; j label2 +; block1: +; movl $1, %eax +; movq %rbp, %rsp +; popq %rbp +; ret +; block2: +; movl $2, %eax +; movq %rbp, %rsp +; popq %rbp +; ret function %f2(b1) -> i32 { block0(v0: b1): @@ -74,30 +57,19 @@ block2: return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (successor: Block 1) -; (successor: Block 2) -; (instruction range: 0 .. 4) -; Inst 0: pushq %rbp -; Inst 1: movq %rsp, %rbp -; Inst 2: testb $1, %dil -; Inst 3: jz label1; j label2 -; Block 1: -; (original IR block: block1) -; (instruction range: 4 .. 8) -; Inst 4: movl $1, %eax -; Inst 5: movq %rbp, %rsp -; Inst 6: popq %rbp -; Inst 7: ret -; Block 2: -; (original IR block: block2) -; (instruction range: 8 .. 12) -; Inst 8: movl $2, %eax -; Inst 9: movq %rbp, %rsp -; Inst 10: popq %rbp -; Inst 11: ret -; }} +; pushq %rbp +; movq %rsp, %rbp +; block0: +; testb $1, %dil +; jz label1; j label2 +; block1: +; movl $1, %eax +; movq %rbp, %rsp +; popq %rbp +; ret +; block2: +; movl $2, %eax +; movq %rbp, %rsp +; popq %rbp +; ret diff --git a/cranelift/filetests/filetests/isa/x64/basic.clif b/cranelift/filetests/filetests/isa/x64/basic.clif index 537f0ba993..ba779d4cf6 100644 --- a/cranelift/filetests/filetests/isa/x64/basic.clif +++ b/cranelift/filetests/filetests/isa/x64/basic.clif @@ -7,17 +7,12 @@ block0(v0: i32, v1: i32): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 7) -; Inst 0: pushq %rbp -; Inst 1: movq %rsp, %rbp -; Inst 2: addl %esi, %edi -; Inst 3: movq %rdi, %rax -; Inst 4: movq %rbp, %rsp -; Inst 5: popq %rbp -; Inst 6: ret -; }} +; pushq %rbp +; movq %rsp, %rbp +; block0: +; addl %edi, %esi, %edi +; movq %rdi, %rax +; movq %rbp, %rsp +; popq %rbp +; ret diff --git a/cranelift/filetests/filetests/isa/x64/bextend.clif b/cranelift/filetests/filetests/isa/x64/bextend.clif index fca9d8efde..8c79762d56 100644 --- a/cranelift/filetests/filetests/isa/x64/bextend.clif +++ b/cranelift/filetests/filetests/isa/x64/bextend.clif @@ -7,17 +7,11 @@ block0(v0: b8): return v1 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 7) -; Inst 0: pushq %rbp -; Inst 1: movq %rsp, %rbp -; Inst 2: movsbq %dil, %rsi -; Inst 3: movq %rsi, %rax -; Inst 4: movq %rbp, %rsp -; Inst 5: popq %rbp -; Inst 6: ret -; }} +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movsbq %dil, %rax +; movq %rbp, %rsp +; popq %rbp +; ret diff --git a/cranelift/filetests/filetests/isa/x64/branches.clif b/cranelift/filetests/filetests/isa/x64/branches.clif index d0d7462366..0a6dbfacc8 100644 --- a/cranelift/filetests/filetests/isa/x64/branches.clif +++ b/cranelift/filetests/filetests/isa/x64/branches.clif @@ -16,32 +16,21 @@ block2: return v4 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (successor: Block 1) -; (successor: Block 2) -; (instruction range: 0 .. 4) -; Inst 0: pushq %rbp -; Inst 1: movq %rsp, %rbp -; Inst 2: cmpl %esi, %edi -; Inst 3: jz label1; j label2 -; Block 1: -; (original IR block: block1) -; (instruction range: 4 .. 8) -; Inst 4: movl $1, %eax -; Inst 5: movq %rbp, %rsp -; Inst 6: popq %rbp -; Inst 7: ret -; Block 2: -; (original IR block: block2) -; (instruction range: 8 .. 12) -; Inst 8: movl $2, %eax -; Inst 9: movq %rbp, %rsp -; Inst 10: popq %rbp -; Inst 11: ret -; }} +; pushq %rbp +; movq %rsp, %rbp +; block0: +; cmpl %esi, %edi +; jz label1; j label2 +; block1: +; movl $1, %eax +; movq %rbp, %rsp +; popq %rbp +; ret +; block2: +; movl $2, %eax +; movq %rbp, %rsp +; popq %rbp +; ret function %f1(i32, i32) -> i32 { block0(v0: i32, v1: i32): @@ -58,32 +47,21 @@ block2: return v4 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (successor: Block 1) -; (successor: Block 2) -; (instruction range: 0 .. 4) -; Inst 0: pushq %rbp -; Inst 1: movq %rsp, %rbp -; Inst 2: cmpl %esi, %edi -; Inst 3: jnz label1; j label2 -; Block 1: -; (original IR block: block1) -; (instruction range: 4 .. 8) -; Inst 4: movl $1, %eax -; Inst 5: movq %rbp, %rsp -; Inst 6: popq %rbp -; Inst 7: ret -; Block 2: -; (original IR block: block2) -; (instruction range: 8 .. 12) -; Inst 8: movl $2, %eax -; Inst 9: movq %rbp, %rsp -; Inst 10: popq %rbp -; Inst 11: ret -; }} +; pushq %rbp +; movq %rsp, %rbp +; block0: +; cmpl %esi, %edi +; jnz label1; j label2 +; block1: +; movl $1, %eax +; movq %rbp, %rsp +; popq %rbp +; ret +; block2: +; movl $2, %eax +; movq %rbp, %rsp +; popq %rbp +; ret function %f2(i32, i32) -> i32 { block0(v0: i32, v1: i32): @@ -100,32 +78,21 @@ block2: return v4 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (successor: Block 1) -; (successor: Block 2) -; (instruction range: 0 .. 4) -; Inst 0: pushq %rbp -; Inst 1: movq %rsp, %rbp -; Inst 2: cmpl %esi, %edi -; Inst 3: jz label1; j label2 -; Block 1: -; (original IR block: block1) -; (instruction range: 4 .. 8) -; Inst 4: movl $1, %eax -; Inst 5: movq %rbp, %rsp -; Inst 6: popq %rbp -; Inst 7: ret -; Block 2: -; (original IR block: block2) -; (instruction range: 8 .. 12) -; Inst 8: movl $2, %eax -; Inst 9: movq %rbp, %rsp -; Inst 10: popq %rbp -; Inst 11: ret -; }} +; pushq %rbp +; movq %rsp, %rbp +; block0: +; cmpl %esi, %edi +; jz label1; j label2 +; block1: +; movl $1, %eax +; movq %rbp, %rsp +; popq %rbp +; ret +; block2: +; movl $2, %eax +; movq %rbp, %rsp +; popq %rbp +; ret function %f3(f32, f32) -> i32 { block0(v0: f32, v1: f32): @@ -142,31 +109,20 @@ block2: return v4 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (successor: Block 2) -; (successor: Block 1) -; (instruction range: 0 .. 5) -; Inst 0: pushq %rbp -; Inst 1: movq %rsp, %rbp -; Inst 2: ucomiss %xmm1, %xmm0 -; Inst 3: jp label2 -; Inst 4: jnz label2; j label1 -; Block 1: -; (original IR block: block1) -; (instruction range: 5 .. 9) -; Inst 5: movl $1, %eax -; Inst 6: movq %rbp, %rsp -; Inst 7: popq %rbp -; Inst 8: ret -; Block 2: -; (original IR block: block2) -; (instruction range: 9 .. 13) -; Inst 9: movl $2, %eax -; Inst 10: movq %rbp, %rsp -; Inst 11: popq %rbp -; Inst 12: ret -; }} +; pushq %rbp +; movq %rsp, %rbp +; block0: +; ucomiss %xmm1, %xmm0 +; jp label2 +; jnz label2; j label1 +; block1: +; movl $1, %eax +; movq %rbp, %rsp +; popq %rbp +; ret +; block2: +; movl $2, %eax +; movq %rbp, %rsp +; popq %rbp +; ret diff --git a/cranelift/filetests/filetests/isa/x64/call-conv.clif b/cranelift/filetests/filetests/isa/x64/call-conv.clif index 56f16f9888..cfb2bd3009 100644 --- a/cranelift/filetests/filetests/isa/x64/call-conv.clif +++ b/cranelift/filetests/filetests/isa/x64/call-conv.clif @@ -9,23 +9,19 @@ block0(v0: i32): return } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 11) -; Inst 0: pushq %rbp -; Inst 1: movq %rsp, %rbp -; Inst 2: subq $32, %rsp -; Inst 3: virtual_sp_offset_adjust 32 -; Inst 4: movq %rdi, %rcx -; Inst 5: call *%rdi -; Inst 6: addq $32, %rsp -; Inst 7: virtual_sp_offset_adjust -32 -; Inst 8: movq %rbp, %rsp -; Inst 9: popq %rbp -; Inst 10: ret -; }} +; pushq %rbp +; movq %rsp, %rbp +; block0: +; subq %rsp, $32, %rsp +; virtual_sp_offset_adjust 32 +; movq %rdi, %rcx +; movq %rcx, %rdi +; call *%rdi +; addq %rsp, $32, %rsp +; virtual_sp_offset_adjust -32 +; movq %rbp, %rsp +; popq %rbp +; ret function %two_args(i32, f32) system_v { ;; system_v has params in %rdi, %xmm0, fascall in %rcx, %xmm1 @@ -37,29 +33,26 @@ block0(v0: i32, v1: f32): return } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 17) -; Inst 0: pushq %rbp -; Inst 1: movq %rsp, %rbp -; Inst 2: movq %rdi, %rsi -; Inst 3: movaps %xmm0, %xmm6 -; Inst 4: subq $32, %rsp -; Inst 5: virtual_sp_offset_adjust 32 -; Inst 6: movq %rsi, %rcx -; Inst 7: movaps %xmm6, %xmm1 -; Inst 8: call *%rsi -; Inst 9: addq $32, %rsp -; Inst 10: virtual_sp_offset_adjust -32 -; Inst 11: movq %rsi, %rdi -; Inst 12: movaps %xmm6, %xmm0 -; Inst 13: call *%rsi -; Inst 14: movq %rbp, %rsp -; Inst 15: popq %rbp -; Inst 16: ret -; }} +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq %rdi, %rax +; movdqa %xmm0, %xmm6 +; subq %rsp, $32, %rsp +; virtual_sp_offset_adjust 32 +; movq %rax, %rcx +; movdqa %xmm6, %xmm1 +; movq %rax, %rdi +; movdqa %xmm1, %xmm6 +; call *%rdi +; addq %rsp, $32, %rsp +; virtual_sp_offset_adjust -32 +; movq %rdi, %rax +; movdqa %xmm6, %xmm0 +; call *%rax +; movq %rbp, %rsp +; popq %rbp +; ret function %fastcall_to_systemv(i32) windows_fastcall { ;; fastcall preserves xmm6+, rbx, rbp, rdi, rsi, r12-r15 @@ -70,44 +63,39 @@ block0(v0: i32): return } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 32) -; Inst 0: pushq %rbp -; Inst 1: movq %rsp, %rbp -; Inst 2: subq $176, %rsp -; Inst 3: movdqu %xmm6, 0(%rsp) -; Inst 4: movdqu %xmm7, 16(%rsp) -; Inst 5: movdqu %xmm8, 32(%rsp) -; Inst 6: movdqu %xmm9, 48(%rsp) -; Inst 7: movdqu %xmm10, 64(%rsp) -; Inst 8: movdqu %xmm11, 80(%rsp) -; Inst 9: movdqu %xmm12, 96(%rsp) -; Inst 10: movdqu %xmm13, 112(%rsp) -; Inst 11: movdqu %xmm14, 128(%rsp) -; Inst 12: movdqu %xmm15, 144(%rsp) -; Inst 13: movq %rsi, 160(%rsp) -; Inst 14: movq %rdi, 168(%rsp) -; Inst 15: call *%rcx -; Inst 16: movdqu 0(%rsp), %xmm6 -; Inst 17: movdqu 16(%rsp), %xmm7 -; Inst 18: movdqu 32(%rsp), %xmm8 -; Inst 19: movdqu 48(%rsp), %xmm9 -; Inst 20: movdqu 64(%rsp), %xmm10 -; Inst 21: movdqu 80(%rsp), %xmm11 -; Inst 22: movdqu 96(%rsp), %xmm12 -; Inst 23: movdqu 112(%rsp), %xmm13 -; Inst 24: movdqu 128(%rsp), %xmm14 -; Inst 25: movdqu 144(%rsp), %xmm15 -; Inst 26: movq 160(%rsp), %rsi -; Inst 27: movq 168(%rsp), %rdi -; Inst 28: addq $176, %rsp -; Inst 29: movq %rbp, %rsp -; Inst 30: popq %rbp -; Inst 31: ret -; }} +; pushq %rbp +; movq %rsp, %rbp +; subq %rsp, $176, %rsp +; movq %rsi, 0(%rsp) +; movq %rdi, 8(%rsp) +; movdqu %xmm6, 16(%rsp) +; movdqu %xmm7, 32(%rsp) +; movdqu %xmm8, 48(%rsp) +; movdqu %xmm9, 64(%rsp) +; movdqu %xmm10, 80(%rsp) +; movdqu %xmm11, 96(%rsp) +; movdqu %xmm12, 112(%rsp) +; movdqu %xmm13, 128(%rsp) +; movdqu %xmm14, 144(%rsp) +; movdqu %xmm15, 160(%rsp) +; block0: +; call *%rcx +; movq 0(%rsp), %rsi +; movq 8(%rsp), %rdi +; movdqu 16(%rsp), %xmm6 +; movdqu 32(%rsp), %xmm7 +; movdqu 48(%rsp), %xmm8 +; movdqu 64(%rsp), %xmm9 +; movdqu 80(%rsp), %xmm10 +; movdqu 96(%rsp), %xmm11 +; movdqu 112(%rsp), %xmm12 +; movdqu 128(%rsp), %xmm13 +; movdqu 144(%rsp), %xmm14 +; movdqu 160(%rsp), %xmm15 +; addq %rsp, $176, %rsp +; movq %rbp, %rsp +; popq %rbp +; ret function %many_args( ;; rdi, rsi, rdx, rcx, r8, r9, @@ -139,56 +127,58 @@ block0( return } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 44) -; Inst 0: pushq %rbp -; Inst 1: movq %rsp, %rbp -; Inst 2: subq $32, %rsp -; Inst 3: movq %r12, 0(%rsp) -; Inst 4: movq %r13, 8(%rsp) -; Inst 5: movq %r14, 16(%rsp) -; Inst 6: movq %rdx, %rax -; Inst 7: movq %rcx, %r10 -; Inst 8: movq %r8, %r11 -; Inst 9: movq %r9, %r12 -; Inst 10: movq 16(%rbp), %r13 -; Inst 11: movq 24(%rbp), %r14 -; Inst 12: movss 32(%rbp), %xmm8 -; Inst 13: movsd 40(%rbp), %xmm9 -; Inst 14: subq $144, %rsp -; Inst 15: virtual_sp_offset_adjust 144 -; Inst 16: movq %rdi, %rcx -; Inst 17: movq %rsi, %rdx -; Inst 18: movq %rax, %r8 -; Inst 19: movq %r10, %r9 -; Inst 20: movq %r11, 32(%rsp) -; Inst 21: movq %r12, 40(%rsp) -; Inst 22: movsd %xmm0, 48(%rsp) -; Inst 23: movsd %xmm1, 56(%rsp) -; Inst 24: movsd %xmm2, 64(%rsp) -; Inst 25: movsd %xmm3, 72(%rsp) -; Inst 26: movsd %xmm4, 80(%rsp) -; Inst 27: movsd %xmm5, 88(%rsp) -; Inst 28: movsd %xmm6, 96(%rsp) -; Inst 29: movsd %xmm7, 104(%rsp) -; Inst 30: movq %r13, 112(%rsp) -; Inst 31: movl %r14d, 120(%rsp) -; Inst 32: movss %xmm8, 128(%rsp) -; Inst 33: movsd %xmm9, 136(%rsp) -; Inst 34: call *%rdi -; Inst 35: addq $144, %rsp -; Inst 36: virtual_sp_offset_adjust -144 -; Inst 37: movq 0(%rsp), %r12 -; Inst 38: movq 8(%rsp), %r13 -; Inst 39: movq 16(%rsp), %r14 -; Inst 40: addq $32, %rsp -; Inst 41: movq %rbp, %rsp -; Inst 42: popq %rbp -; Inst 43: ret -; }} +; pushq %rbp +; movq %rsp, %rbp +; subq %rsp, $96, %rsp +; movq %rbx, 48(%rsp) +; movq %r12, 56(%rsp) +; movq %r13, 64(%rsp) +; movq %r14, 72(%rsp) +; movq %r15, 80(%rsp) +; block0: +; movq %rsi, %rbx +; movq %rdx, %r14 +; movq %rcx, %r12 +; movq %r8, %r13 +; movq %r9, %r15 +; movq 16(%rbp), %rax +; movq 24(%rbp), %r11 +; movss 32(%rbp), %xmm9 +; movsd 40(%rbp), %xmm8 +; subq %rsp, $144, %rsp +; virtual_sp_offset_adjust 144 +; movq %rdi, %rcx +; movq %rbx, %rdx +; movq %r14, %r8 +; movq %r12, %r9 +; movq %r13, %rsi +; movq %rsi, 32(%rsp) +; movq %r15, %rsi +; movq %rsi, 40(%rsp) +; movsd %xmm0, 48(%rsp) +; movsd %xmm1, 56(%rsp) +; movsd %xmm2, 64(%rsp) +; movsd %xmm3, 72(%rsp) +; movsd %xmm4, 80(%rsp) +; movsd %xmm5, 88(%rsp) +; movsd %xmm6, 96(%rsp) +; movsd %xmm7, 104(%rsp) +; movq %rax, 112(%rsp) +; movl %r11d, 120(%rsp) +; movss %xmm9, 128(%rsp) +; movsd %xmm8, 136(%rsp) +; call *%rdi +; addq %rsp, $144, %rsp +; virtual_sp_offset_adjust -144 +; movq 48(%rsp), %rbx +; movq 56(%rsp), %r12 +; movq 64(%rsp), %r13 +; movq 72(%rsp), %r14 +; movq 80(%rsp), %r15 +; addq %rsp, $96, %rsp +; movq %rbp, %rsp +; popq %rbp +; ret function %many_ints(i64, i64, i64, i64, i64) system_v { ;; rdi => rcx @@ -202,29 +192,31 @@ block0(v0: i64, v1:i64, v2:i64, v3:i64, v4:i64): return } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 17) -; Inst 0: pushq %rbp -; Inst 1: movq %rsp, %rbp -; Inst 2: movq %rdx, %rax -; Inst 3: movq %rcx, %r9 -; Inst 4: movq %r8, %r10 -; Inst 5: subq $48, %rsp -; Inst 6: virtual_sp_offset_adjust 48 -; Inst 7: movq %rdi, %rcx -; Inst 8: movq %rsi, %rdx -; Inst 9: movq %rax, %r8 -; Inst 10: movq %r10, 32(%rsp) -; Inst 11: call *%rdi -; Inst 12: addq $48, %rsp -; Inst 13: virtual_sp_offset_adjust -48 -; Inst 14: movq %rbp, %rsp -; Inst 15: popq %rbp -; Inst 16: ret -; }} +; pushq %rbp +; movq %rsp, %rbp +; subq %rsp, $16, %rsp +; movq %rbx, 0(%rsp) +; block0: +; movq %rsi, %r11 +; movq %rdx, %r9 +; movq %rcx, %rax +; movq %r8, %rbx +; subq %rsp, $48, %rsp +; virtual_sp_offset_adjust 48 +; movq %rdi, %rcx +; movq %r11, %rdx +; movq %r9, %r8 +; movq %rax, %r9 +; movq %rbx, %r11 +; movq %r11, 32(%rsp) +; call *%rdi +; addq %rsp, $48, %rsp +; virtual_sp_offset_adjust -48 +; movq 0(%rsp), %rbx +; addq %rsp, $16, %rsp +; movq %rbp, %rsp +; popq %rbp +; ret function %many_args2(i32, f32, i64, f64, i32, i32, i32, f32, f64, f32, f64) system_v { sig0 = (i32, f32, i64, f64, i32, i32, i32, f32, f64, f32, f64) windows_fastcall @@ -233,37 +225,37 @@ block0(v0: i32, v1: f32, v2: i64, v3: f64, v4: i32, v5: i32, v6: i32, v7: f32, v return } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 25) -; Inst 0: pushq %rbp -; Inst 1: movq %rsp, %rbp -; Inst 2: movaps %xmm1, %xmm6 -; Inst 3: movq %rcx, %rax -; Inst 4: movq %r8, %r9 -; Inst 5: movaps %xmm3, %xmm7 -; Inst 6: subq $96, %rsp -; Inst 7: virtual_sp_offset_adjust 96 -; Inst 8: movq %rdi, %rcx -; Inst 9: movaps %xmm0, %xmm1 -; Inst 10: movq %rsi, %r8 -; Inst 11: movaps %xmm6, %xmm3 -; Inst 12: movl %edx, 32(%rsp) -; Inst 13: movl %eax, 40(%rsp) -; Inst 14: movl %r9d, 48(%rsp) -; Inst 15: movss %xmm2, 56(%rsp) -; Inst 16: movsd %xmm7, 64(%rsp) -; Inst 17: movss %xmm4, 72(%rsp) -; Inst 18: movsd %xmm5, 80(%rsp) -; Inst 19: call *%rdi -; Inst 20: addq $96, %rsp -; Inst 21: virtual_sp_offset_adjust -96 -; Inst 22: movq %rbp, %rsp -; Inst 23: popq %rbp -; Inst 24: ret -; }} +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movdqa %xmm0, %xmm6 +; movq %rsi, %rax +; movdqa %xmm1, %xmm14 +; movq %rcx, %r11 +; movq %r8, %r9 +; movdqa %xmm3, %xmm10 +; subq %rsp, $96, %rsp +; virtual_sp_offset_adjust 96 +; movq %rdi, %rcx +; movdqa %xmm6, %xmm1 +; movq %rax, %r8 +; movdqa %xmm14, %xmm3 +; movl %edx, 32(%rsp) +; movq %r11, %rdx +; movl %edx, 40(%rsp) +; movq %r9, %rax +; movl %eax, 48(%rsp) +; movss %xmm2, 56(%rsp) +; movdqa %xmm10, %xmm2 +; movsd %xmm2, 64(%rsp) +; movss %xmm4, 72(%rsp) +; movsd %xmm5, 80(%rsp) +; call *%rdi +; addq %rsp, $96, %rsp +; virtual_sp_offset_adjust -96 +; movq %rbp, %rsp +; popq %rbp +; ret function %wasmtime_mix1(i32) wasmtime_system_v { sig0 = (i32) system_v @@ -272,20 +264,14 @@ block0(v0: i32): return } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 8) -; Inst 0: pushq %rbp -; Inst 1: movq %rsp, %rbp -; Inst 2: movq %rdi, %rsi -; Inst 3: movq %rsi, %rdi -; Inst 4: call *%rsi -; Inst 5: movq %rbp, %rsp -; Inst 6: popq %rbp -; Inst 7: ret -; }} +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq %rdi, %rcx +; call *%rcx +; movq %rbp, %rsp +; popq %rbp +; ret function %wasmtime_mix2(i32) system_v { sig0 = (i32) wasmtime_system_v @@ -294,20 +280,14 @@ block0(v0: i32): return } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 8) -; Inst 0: pushq %rbp -; Inst 1: movq %rsp, %rbp -; Inst 2: movq %rdi, %rsi -; Inst 3: movq %rsi, %rdi -; Inst 4: call *%rsi -; Inst 5: movq %rbp, %rsp -; Inst 6: popq %rbp -; Inst 7: ret -; }} +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq %rdi, %rcx +; call *%rcx +; movq %rbp, %rsp +; popq %rbp +; ret function %wasmtime_mix2() -> i32, i32 system_v { sig0 = () -> i32, i32 wasmtime_system_v @@ -317,26 +297,20 @@ block0: return v0, v1 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 14) -; Inst 0: pushq %rbp -; Inst 1: movq %rsp, %rbp -; Inst 2: movl $1, %esi -; Inst 3: subq $16, %rsp -; Inst 4: virtual_sp_offset_adjust 16 -; Inst 5: lea 0(%rsp), %rdi -; Inst 6: call *%rsi -; Inst 7: movq 0(%rsp), %rsi -; Inst 8: addq $16, %rsp -; Inst 9: virtual_sp_offset_adjust -16 -; Inst 10: movq %rsi, %rdx -; Inst 11: movq %rbp, %rsp -; Inst 12: popq %rbp -; Inst 13: ret -; }} +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movl $1, %edx +; subq %rsp, $16, %rsp +; virtual_sp_offset_adjust 16 +; lea 0(%rsp), %rdi +; call *%rdx +; movq 0(%rsp), %rdx +; addq %rsp, $16, %rsp +; virtual_sp_offset_adjust -16 +; movq %rbp, %rsp +; popq %rbp +; ret function %wasmtime_mix3() -> i32, i32 wasmtime_system_v { sig0 = () -> i32, i32 system_v @@ -346,25 +320,21 @@ block0: return v0, v1 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 13) -; Inst 0: pushq %rbp -; Inst 1: movq %rsp, %rbp -; Inst 2: subq $16, %rsp -; Inst 3: movq %r12, 0(%rsp) -; Inst 4: movq %rdi, %r12 -; Inst 5: movl $1, %esi -; Inst 6: call *%rsi -; Inst 7: movl %edx, 0(%r12) -; Inst 8: movq 0(%rsp), %r12 -; Inst 9: addq $16, %rsp -; Inst 10: movq %rbp, %rsp -; Inst 11: popq %rbp -; Inst 12: ret -; }} +; pushq %rbp +; movq %rsp, %rbp +; subq %rsp, $16, %rsp +; movq %r13, 0(%rsp) +; block0: +; movq %rdi, %r13 +; movl $1, %r9d +; call *%r9 +; movq %r13, %rdi +; movl %edx, 0(%rdi) +; movq 0(%rsp), %r13 +; addq %rsp, $16, %rsp +; movq %rbp, %rsp +; popq %rbp +; ret function %wasmtime_mix4() -> i32, i64, i32 wasmtime_system_v { sig0 = () -> i32, i64, i32 system_v @@ -374,32 +344,28 @@ block0: return v0, v1, v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 20) -; Inst 0: pushq %rbp -; Inst 1: movq %rsp, %rbp -; Inst 2: subq $16, %rsp -; Inst 3: movq %r12, 0(%rsp) -; Inst 4: movq %rdi, %r12 -; Inst 5: movl $1, %esi -; Inst 6: subq $16, %rsp -; Inst 7: virtual_sp_offset_adjust 16 -; Inst 8: lea 0(%rsp), %rdi -; Inst 9: call *%rsi -; Inst 10: movq 0(%rsp), %rsi -; Inst 11: addq $16, %rsp -; Inst 12: virtual_sp_offset_adjust -16 -; Inst 13: movq %rdx, 0(%r12) -; Inst 14: movl %esi, 8(%r12) -; Inst 15: movq 0(%rsp), %r12 -; Inst 16: addq $16, %rsp -; Inst 17: movq %rbp, %rsp -; Inst 18: popq %rbp -; Inst 19: ret -; }} +; pushq %rbp +; movq %rsp, %rbp +; subq %rsp, $16, %rsp +; movq %rbx, 0(%rsp) +; block0: +; movq %rdi, %rbx +; movl $1, %esi +; subq %rsp, $16, %rsp +; virtual_sp_offset_adjust 16 +; lea 0(%rsp), %rdi +; call *%rsi +; movq 0(%rsp), %rcx +; addq %rsp, $16, %rsp +; virtual_sp_offset_adjust -16 +; movq %rbx, %rdi +; movq %rdx, 0(%rdi) +; movl %ecx, 8(%rdi) +; movq 0(%rsp), %rbx +; addq %rsp, $16, %rsp +; movq %rbp, %rsp +; popq %rbp +; ret function %wasmtime_mix5() -> f32, i64, i32, f32 wasmtime_system_v { sig0 = () -> f32, i64, i32, f32 system_v @@ -409,27 +375,23 @@ block0: return v0, v1, v2, v3 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 15) -; Inst 0: pushq %rbp -; Inst 1: movq %rsp, %rbp -; Inst 2: subq $16, %rsp -; Inst 3: movq %r12, 0(%rsp) -; Inst 4: movq %rdi, %r12 -; Inst 5: movl $1, %esi -; Inst 6: call *%rsi -; Inst 7: movq %rax, 0(%r12) -; Inst 8: movl %edx, 8(%r12) -; Inst 9: movss %xmm1, 12(%r12) -; Inst 10: movq 0(%rsp), %r12 -; Inst 11: addq $16, %rsp -; Inst 12: movq %rbp, %rsp -; Inst 13: popq %rbp -; Inst 14: ret -; }} +; pushq %rbp +; movq %rsp, %rbp +; subq %rsp, $16, %rsp +; movq %r12, 0(%rsp) +; block0: +; movq %rdi, %r12 +; movl $1, %eax +; call *%rax +; movq %r12, %rdi +; movq %rax, 0(%rdi) +; movl %edx, 8(%rdi) +; movss %xmm1, 12(%rdi) +; movq 0(%rsp), %r12 +; addq %rsp, $16, %rsp +; movq %rbp, %rsp +; popq %rbp +; ret function %wasmtime_mix6(f32, i64, i32, f32) -> f32, i64, i32, f32 wasmtime_system_v { sig0 = (f32, i64, i32, f32) -> f32, i64, i32, f32 system_v @@ -439,25 +401,21 @@ block0(v0: f32, v1: i64, v2: i32, v3: f32): return v5, v6, v7, v8 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 15) -; Inst 0: pushq %rbp -; Inst 1: movq %rsp, %rbp -; Inst 2: subq $16, %rsp -; Inst 3: movq %r12, 0(%rsp) -; Inst 4: movq %rdx, %r12 -; Inst 5: movl $1, %eax -; Inst 6: call *%rax -; Inst 7: movq %rax, 0(%r12) -; Inst 8: movl %edx, 8(%r12) -; Inst 9: movss %xmm1, 12(%r12) -; Inst 10: movq 0(%rsp), %r12 -; Inst 11: addq $16, %rsp -; Inst 12: movq %rbp, %rsp -; Inst 13: popq %rbp -; Inst 14: ret -; }} +; pushq %rbp +; movq %rsp, %rbp +; subq %rsp, $16, %rsp +; movq %rbx, 0(%rsp) +; block0: +; movq %rdx, %rbx +; movl $1, %eax +; call *%rax +; movq %rbx, %rcx +; movq %rax, 0(%rcx) +; movl %edx, 8(%rcx) +; movss %xmm1, 12(%rcx) +; movq 0(%rsp), %rbx +; addq %rsp, $16, %rsp +; movq %rbp, %rsp +; popq %rbp +; ret diff --git a/cranelift/filetests/filetests/isa/x64/clz-lzcnt.clif b/cranelift/filetests/filetests/isa/x64/clz-lzcnt.clif index fcf828607f..38a42e95b4 100644 --- a/cranelift/filetests/filetests/isa/x64/clz-lzcnt.clif +++ b/cranelift/filetests/filetests/isa/x64/clz-lzcnt.clif @@ -7,19 +7,13 @@ block0(v0: i64): return v1 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 7) -; Inst 0: pushq %rbp -; Inst 1: movq %rsp, %rbp -; Inst 2: lzcntq %rdi, %rsi -; Inst 3: movq %rsi, %rax -; Inst 4: movq %rbp, %rsp -; Inst 5: popq %rbp -; Inst 6: ret -; }} +; pushq %rbp +; movq %rsp, %rbp +; block0: +; lzcntq %rdi, %rax +; movq %rbp, %rsp +; popq %rbp +; ret function %clz(i32) -> i32 { block0(v0: i32): @@ -27,17 +21,11 @@ block0(v0: i32): return v1 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 7) -; Inst 0: pushq %rbp -; Inst 1: movq %rsp, %rbp -; Inst 2: lzcntl %edi, %esi -; Inst 3: movq %rsi, %rax -; Inst 4: movq %rbp, %rsp -; Inst 5: popq %rbp -; Inst 6: ret -; }} +; pushq %rbp +; movq %rsp, %rbp +; block0: +; lzcntl %edi, %eax +; movq %rbp, %rsp +; popq %rbp +; ret diff --git a/cranelift/filetests/filetests/isa/x64/cmp-mem-bug.clif b/cranelift/filetests/filetests/isa/x64/cmp-mem-bug.clif index bb60205667..ffa1a37d60 100644 --- a/cranelift/filetests/filetests/isa/x64/cmp-mem-bug.clif +++ b/cranelift/filetests/filetests/isa/x64/cmp-mem-bug.clif @@ -10,25 +10,19 @@ block0(v0: i64, v1: i64): return v4, v5 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 13) -; Inst 0: pushq %rbp -; Inst 1: movq %rsp, %rbp -; Inst 2: movq 0(%rsi), %rax -; Inst 3: cmpq %rax, %rdi -; Inst 4: setz %cl -; Inst 5: andq $1, %rcx -; Inst 6: cmpq %rax, %rdi -; Inst 7: cmovzq %rdi, %rsi -; Inst 8: movq %rcx, %rax -; Inst 9: movq %rsi, %rdx -; Inst 10: movq %rbp, %rsp -; Inst 11: popq %rbp -; Inst 12: ret -; }} +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq 0(%rsi), %rcx +; cmpq %rcx, %rdi +; setz %al +; andq %rax, $1, %rax +; cmpq %rcx, %rdi +; cmovzq %rdi, %rsi, %rsi +; movq %rsi, %rdx +; movq %rbp, %rsp +; popq %rbp +; ret function %f1(f64, i64) -> i64, f64 { block0(v0: f64, v1: i64): @@ -39,27 +33,20 @@ block0(v0: f64, v1: i64): return v4, v5 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 17) -; Inst 0: pushq %rbp -; Inst 1: movq %rsp, %rbp -; Inst 2: movsd 0(%rdi), %xmm1 -; Inst 3: ucomisd %xmm1, %xmm0 -; Inst 4: setnp %sil -; Inst 5: setz %dil -; Inst 6: andl %edi, %esi -; Inst 7: andq $1, %rsi -; Inst 8: ucomisd %xmm0, %xmm1 -; Inst 9: movaps %xmm0, %xmm1 -; Inst 10: jz $next; movsd %xmm0, %xmm1; $next: -; Inst 11: jnp $next; movsd %xmm0, %xmm1; $next: -; Inst 12: movq %rsi, %rax -; Inst 13: movaps %xmm1, %xmm0 -; Inst 14: movq %rbp, %rsp -; Inst 15: popq %rbp -; Inst 16: ret -; }} +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movsd 0(%rdi), %xmm12 +; ucomisd %xmm12, %xmm0 +; setnp %al +; setz %r8b +; andl %eax, %r8d, %eax +; andq %rax, $1, %rax +; ucomisd %xmm0, %xmm12 +; movdqa %xmm0, %xmm6 +; mov z, sd; j%xmm6 $next; mov%xmm0 %xmm0, %xmm0; $next: +; mov np, sd; j%xmm6 $next; mov%xmm0 %xmm0, %xmm0; $next: +; movq %rbp, %rsp +; popq %rbp +; ret diff --git a/cranelift/filetests/filetests/isa/x64/ctz-bmi1.clif b/cranelift/filetests/filetests/isa/x64/ctz-bmi1.clif index ab96df7607..e0ff8122bb 100644 --- a/cranelift/filetests/filetests/isa/x64/ctz-bmi1.clif +++ b/cranelift/filetests/filetests/isa/x64/ctz-bmi1.clif @@ -7,19 +7,13 @@ block0(v0: i64): return v1 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 7) -; Inst 0: pushq %rbp -; Inst 1: movq %rsp, %rbp -; Inst 2: tzcntq %rdi, %rsi -; Inst 3: movq %rsi, %rax -; Inst 4: movq %rbp, %rsp -; Inst 5: popq %rbp -; Inst 6: ret -; }} +; pushq %rbp +; movq %rsp, %rbp +; block0: +; tzcntq %rdi, %rax +; movq %rbp, %rsp +; popq %rbp +; ret function %ctz(i32) -> i32 { block0(v0: i32): @@ -27,17 +21,11 @@ block0(v0: i32): return v1 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 7) -; Inst 0: pushq %rbp -; Inst 1: movq %rsp, %rbp -; Inst 2: tzcntl %edi, %esi -; Inst 3: movq %rsi, %rax -; Inst 4: movq %rbp, %rsp -; Inst 5: popq %rbp -; Inst 6: ret -; }} +; pushq %rbp +; movq %rsp, %rbp +; block0: +; tzcntl %edi, %eax +; movq %rbp, %rsp +; popq %rbp +; ret diff --git a/cranelift/filetests/filetests/isa/x64/div-checks.clif b/cranelift/filetests/filetests/isa/x64/div-checks.clif index 5a366914a2..132f939818 100644 --- a/cranelift/filetests/filetests/isa/x64/div-checks.clif +++ b/cranelift/filetests/filetests/isa/x64/div-checks.clif @@ -12,8 +12,8 @@ block0(v0: i8, v1: i8): v2 = srem.i8 v0, v1 ; check: movq %rdi, %rax ; nextln: movl $$0, %edx -; nextln: srem $$rax:$$rdx, %sil -; nextln: shrq $$8, %rax +; nextln: srem_seq %al, %dl, %sil, %al, %dl, tmp=(none) +; nextln: shrq $$8, %rax, %rax return v2 } @@ -23,7 +23,7 @@ block0(v0: i16, v1: i16): v2 = srem.i16 v0, v1 ; check: movq %rdi, %rax ; nextln: movl $$0, %edx -; nextln: srem $$rax:$$rdx, %si +; nextln: srem_seq %ax, %dx, %si, %ax, %dx, tmp=(none) ; nextln: movq %rdx, %rax return v2 @@ -34,7 +34,7 @@ block0(v0: i32, v1: i32): v2 = srem.i32 v0, v1 ; check: movq %rdi, %rax ; nextln: movl $$0, %edx -; nextln: srem $$rax:$$rdx, %esi +; nextln: srem_seq %eax, %edx, %esi, %eax, %edx, tmp=(none) ; nextln: movq %rdx, %rax return v2 @@ -45,7 +45,7 @@ block0(v0: i64, v1: i64): v2 = srem.i64 v0, v1 ; check: movq %rdi, %rax ; nextln: movl $$0, %edx -; nextln: srem $$rax:$$rdx, %rsi +; nextln: srem_seq %rax, %rdx, %rsi, %rax, %rdx, tmp=(none) ; nextln: movq %rdx, %rax return v2 diff --git a/cranelift/filetests/filetests/isa/x64/fastcall.clif b/cranelift/filetests/filetests/isa/x64/fastcall.clif index 521a142618..8300606476 100644 --- a/cranelift/filetests/filetests/isa/x64/fastcall.clif +++ b/cranelift/filetests/filetests/isa/x64/fastcall.clif @@ -8,120 +8,90 @@ block0(v0: i64, v1: i64, v2: i64, v3: i64): return v0 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 8) -; Inst 0: pushq %rbp -; Inst 1: unwind PushFrameRegs { offset_upward_to_caller_sp: 16 } -; Inst 2: movq %rsp, %rbp -; Inst 3: unwind DefineNewFrame { offset_upward_to_caller_sp: 16, offset_downward_to_clobbers: 0 } -; Inst 4: movq %rcx, %rax -; Inst 5: movq %rbp, %rsp -; Inst 6: popq %rbp -; Inst 7: ret -; }} +; pushq %rbp +; unwind PushFrameRegs { offset_upward_to_caller_sp: 16 } +; movq %rsp, %rbp +; unwind DefineNewFrame { offset_upward_to_caller_sp: 16, offset_downward_to_clobbers: 0 } +; block0: +; movq %rcx, %rax +; movq %rbp, %rsp +; popq %rbp +; ret function %f1(i64, i64, i64, i64) -> i64 windows_fastcall { block0(v0: i64, v1: i64, v2: i64, v3: i64): return v1 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 8) -; Inst 0: pushq %rbp -; Inst 1: unwind PushFrameRegs { offset_upward_to_caller_sp: 16 } -; Inst 2: movq %rsp, %rbp -; Inst 3: unwind DefineNewFrame { offset_upward_to_caller_sp: 16, offset_downward_to_clobbers: 0 } -; Inst 4: movq %rdx, %rax -; Inst 5: movq %rbp, %rsp -; Inst 6: popq %rbp -; Inst 7: ret -; }} +; pushq %rbp +; unwind PushFrameRegs { offset_upward_to_caller_sp: 16 } +; movq %rsp, %rbp +; unwind DefineNewFrame { offset_upward_to_caller_sp: 16, offset_downward_to_clobbers: 0 } +; block0: +; movq %rdx, %rax +; movq %rbp, %rsp +; popq %rbp +; ret function %f2(i64, i64, i64, i64) -> i64 windows_fastcall { block0(v0: i64, v1: i64, v2: i64, v3: i64): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 8) -; Inst 0: pushq %rbp -; Inst 1: unwind PushFrameRegs { offset_upward_to_caller_sp: 16 } -; Inst 2: movq %rsp, %rbp -; Inst 3: unwind DefineNewFrame { offset_upward_to_caller_sp: 16, offset_downward_to_clobbers: 0 } -; Inst 4: movq %r8, %rax -; Inst 5: movq %rbp, %rsp -; Inst 6: popq %rbp -; Inst 7: ret -; }} +; pushq %rbp +; unwind PushFrameRegs { offset_upward_to_caller_sp: 16 } +; movq %rsp, %rbp +; unwind DefineNewFrame { offset_upward_to_caller_sp: 16, offset_downward_to_clobbers: 0 } +; block0: +; movq %r8, %rax +; movq %rbp, %rsp +; popq %rbp +; ret function %f3(i64, i64, i64, i64) -> i64 windows_fastcall { block0(v0: i64, v1: i64, v2: i64, v3: i64): return v3 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 8) -; Inst 0: pushq %rbp -; Inst 1: unwind PushFrameRegs { offset_upward_to_caller_sp: 16 } -; Inst 2: movq %rsp, %rbp -; Inst 3: unwind DefineNewFrame { offset_upward_to_caller_sp: 16, offset_downward_to_clobbers: 0 } -; Inst 4: movq %r9, %rax -; Inst 5: movq %rbp, %rsp -; Inst 6: popq %rbp -; Inst 7: ret -; }} +; pushq %rbp +; unwind PushFrameRegs { offset_upward_to_caller_sp: 16 } +; movq %rsp, %rbp +; unwind DefineNewFrame { offset_upward_to_caller_sp: 16, offset_downward_to_clobbers: 0 } +; block0: +; movq %r9, %rax +; movq %rbp, %rsp +; popq %rbp +; ret function %f4(i64, i64, f64, i64) -> f64 windows_fastcall { block0(v0: i64, v1: i64, v2: f64, v3: i64): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 8) -; Inst 0: pushq %rbp -; Inst 1: unwind PushFrameRegs { offset_upward_to_caller_sp: 16 } -; Inst 2: movq %rsp, %rbp -; Inst 3: unwind DefineNewFrame { offset_upward_to_caller_sp: 16, offset_downward_to_clobbers: 0 } -; Inst 4: movaps %xmm2, %xmm0 -; Inst 5: movq %rbp, %rsp -; Inst 6: popq %rbp -; Inst 7: ret -; }} +; pushq %rbp +; unwind PushFrameRegs { offset_upward_to_caller_sp: 16 } +; movq %rsp, %rbp +; unwind DefineNewFrame { offset_upward_to_caller_sp: 16, offset_downward_to_clobbers: 0 } +; block0: +; movdqa %xmm2, %xmm0 +; movq %rbp, %rsp +; popq %rbp +; ret function %f5(i64, i64, f64, i64) -> i64 windows_fastcall { block0(v0: i64, v1: i64, v2: f64, v3: i64): return v3 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 8) -; Inst 0: pushq %rbp -; Inst 1: unwind PushFrameRegs { offset_upward_to_caller_sp: 16 } -; Inst 2: movq %rsp, %rbp -; Inst 3: unwind DefineNewFrame { offset_upward_to_caller_sp: 16, offset_downward_to_clobbers: 0 } -; Inst 4: movq %r9, %rax -; Inst 5: movq %rbp, %rsp -; Inst 6: popq %rbp -; Inst 7: ret -; }} +; pushq %rbp +; unwind PushFrameRegs { offset_upward_to_caller_sp: 16 } +; movq %rsp, %rbp +; unwind DefineNewFrame { offset_upward_to_caller_sp: 16, offset_downward_to_clobbers: 0 } +; block0: +; movq %r9, %rax +; movq %rbp, %rsp +; popq %rbp +; ret function %f6(i64, i64, i64, i64, i64, i64) -> i64 windows_fastcall { block0(v0: i64, v1: i64, v2: i64, v3: i64, v4: i64, v5: i64): @@ -138,59 +108,33 @@ block0(v0: i64, v1: i64, v2: i64, v3: i64, v4: i64, v5: i64): ;; TODO(#2704): fix regalloc's register priority ordering! } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 15) -; Inst 0: pushq %rbp -; Inst 1: unwind PushFrameRegs { offset_upward_to_caller_sp: 16 } -; Inst 2: movq %rsp, %rbp -; Inst 3: unwind DefineNewFrame { offset_upward_to_caller_sp: 16, offset_downward_to_clobbers: 16 } -; Inst 4: subq $16, %rsp -; Inst 5: movq %rsi, 0(%rsp) -; Inst 6: unwind SaveReg { clobber_offset: 0, reg: r16J } -; Inst 7: movq 48(%rbp), %rsi -; Inst 8: movq 56(%rbp), %rsi -; Inst 9: movq %rsi, %rax -; Inst 10: movq 0(%rsp), %rsi -; Inst 11: addq $16, %rsp -; Inst 12: movq %rbp, %rsp -; Inst 13: popq %rbp -; Inst 14: ret -; }} +; pushq %rbp +; unwind PushFrameRegs { offset_upward_to_caller_sp: 16 } +; movq %rsp, %rbp +; unwind DefineNewFrame { offset_upward_to_caller_sp: 16, offset_downward_to_clobbers: 0 } +; block0: +; movq 48(%rbp), %r11 +; movq 56(%rbp), %rax +; movq %rbp, %rsp +; popq %rbp +; ret function %f7(i128, i64, i128, i128) -> i128 windows_fastcall { block0(v0: i128, v1: i64, v2: i128, v3: i128): return v3 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 20) -; Inst 0: pushq %rbp -; Inst 1: unwind PushFrameRegs { offset_upward_to_caller_sp: 16 } -; Inst 2: movq %rsp, %rbp -; Inst 3: unwind DefineNewFrame { offset_upward_to_caller_sp: 16, offset_downward_to_clobbers: 16 } -; Inst 4: subq $16, %rsp -; Inst 5: movq %rsi, 0(%rsp) -; Inst 6: unwind SaveReg { clobber_offset: 0, reg: r16J } -; Inst 7: movq %rdi, 8(%rsp) -; Inst 8: unwind SaveReg { clobber_offset: 8, reg: r17J } -; Inst 9: movq 48(%rbp), %rsi -; Inst 10: movq 56(%rbp), %rsi -; Inst 11: movq 64(%rbp), %rdi -; Inst 12: movq %rsi, %rax -; Inst 13: movq %rdi, %rdx -; Inst 14: movq 0(%rsp), %rsi -; Inst 15: movq 8(%rsp), %rdi -; Inst 16: addq $16, %rsp -; Inst 17: movq %rbp, %rsp -; Inst 18: popq %rbp -; Inst 19: ret -; }} +; pushq %rbp +; unwind PushFrameRegs { offset_upward_to_caller_sp: 16 } +; movq %rsp, %rbp +; unwind DefineNewFrame { offset_upward_to_caller_sp: 16, offset_downward_to_clobbers: 0 } +; block0: +; movq 48(%rbp), %r11 +; movq 56(%rbp), %rax +; movq 64(%rbp), %rdx +; movq %rbp, %rsp +; popq %rbp +; ret function %f8(i64) -> i64 windows_fastcall { sig0 = (i64, i64, f64, f64, i64, i64) -> i64 windows_fastcall @@ -202,37 +146,26 @@ block0(v0: i64): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 25) -; Inst 0: pushq %rbp -; Inst 1: unwind PushFrameRegs { offset_upward_to_caller_sp: 16 } -; Inst 2: movq %rsp, %rbp -; Inst 3: unwind DefineNewFrame { offset_upward_to_caller_sp: 16, offset_downward_to_clobbers: 16 } -; Inst 4: subq $16, %rsp -; Inst 5: movq %rsi, 0(%rsp) -; Inst 6: unwind SaveReg { clobber_offset: 0, reg: r16J } -; Inst 7: movq %rcx, %rsi -; Inst 8: cvtsi2sd %rsi, %xmm3 -; Inst 9: subq $48, %rsp -; Inst 10: virtual_sp_offset_adjust 48 -; Inst 11: movq %rsi, %rcx -; Inst 12: movq %rsi, %rdx -; Inst 13: movaps %xmm3, %xmm2 -; Inst 14: movq %rsi, 32(%rsp) -; Inst 15: movq %rsi, 40(%rsp) -; Inst 16: load_ext_name %g+0, %rsi -; Inst 17: call *%rsi -; Inst 18: addq $48, %rsp -; Inst 19: virtual_sp_offset_adjust -48 -; Inst 20: movq 0(%rsp), %rsi -; Inst 21: addq $16, %rsp -; Inst 22: movq %rbp, %rsp -; Inst 23: popq %rbp -; Inst 24: ret -; }} +; pushq %rbp +; unwind PushFrameRegs { offset_upward_to_caller_sp: 16 } +; movq %rsp, %rbp +; unwind DefineNewFrame { offset_upward_to_caller_sp: 16, offset_downward_to_clobbers: 0 } +; block0: +; cvtsi2sd %rcx, %xmm2 +; subq %rsp, $48, %rsp +; virtual_sp_offset_adjust 48 +; movq %rcx, %rdx +; movq %rdx, %r8 +; movdqa %xmm2, %xmm3 +; movq %r8, 32(%rsp) +; movq %r8, 40(%rsp) +; load_ext_name %g+0, %r9 +; call *%r9 +; addq %rsp, $48, %rsp +; virtual_sp_offset_adjust -48 +; movq %rbp, %rsp +; popq %rbp +; ret function %f9(i64) -> f64 windows_fastcall { block0(v0: i64): @@ -284,95 +217,91 @@ block0(v0: i64): return v39 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 85) -; Inst 0: pushq %rbp -; Inst 1: unwind PushFrameRegs { offset_upward_to_caller_sp: 16 } -; Inst 2: movq %rsp, %rbp -; Inst 3: unwind DefineNewFrame { offset_upward_to_caller_sp: 16, offset_downward_to_clobbers: 160 } -; Inst 4: subq $224, %rsp -; Inst 5: movdqu %xmm6, 64(%rsp) -; Inst 6: unwind SaveReg { clobber_offset: 0, reg: r6V } -; Inst 7: movdqu %xmm7, 80(%rsp) -; Inst 8: unwind SaveReg { clobber_offset: 16, reg: r7V } -; Inst 9: movdqu %xmm8, 96(%rsp) -; Inst 10: unwind SaveReg { clobber_offset: 32, reg: r8V } -; Inst 11: movdqu %xmm9, 112(%rsp) -; Inst 12: unwind SaveReg { clobber_offset: 48, reg: r9V } -; Inst 13: movdqu %xmm10, 128(%rsp) -; Inst 14: unwind SaveReg { clobber_offset: 64, reg: r10V } -; Inst 15: movdqu %xmm11, 144(%rsp) -; Inst 16: unwind SaveReg { clobber_offset: 80, reg: r11V } -; Inst 17: movdqu %xmm12, 160(%rsp) -; Inst 18: unwind SaveReg { clobber_offset: 96, reg: r12V } -; Inst 19: movdqu %xmm13, 176(%rsp) -; Inst 20: unwind SaveReg { clobber_offset: 112, reg: r13V } -; Inst 21: movdqu %xmm14, 192(%rsp) -; Inst 22: unwind SaveReg { clobber_offset: 128, reg: r14V } -; Inst 23: movdqu %xmm15, 208(%rsp) -; Inst 24: unwind SaveReg { clobber_offset: 144, reg: r15V } -; Inst 25: movsd 0(%rcx), %xmm0 -; Inst 26: movdqu %xmm0, rsp(48 + virtual offset) -; Inst 27: movsd 8(%rcx), %xmm1 -; Inst 28: movsd 16(%rcx), %xmm0 -; Inst 29: movdqu %xmm0, rsp(32 + virtual offset) -; Inst 30: movsd 24(%rcx), %xmm3 -; Inst 31: movsd 32(%rcx), %xmm4 -; Inst 32: movsd 40(%rcx), %xmm5 -; Inst 33: movsd 48(%rcx), %xmm6 -; Inst 34: movsd 56(%rcx), %xmm7 -; Inst 35: movsd 64(%rcx), %xmm8 -; Inst 36: movsd 72(%rcx), %xmm9 -; Inst 37: movsd 80(%rcx), %xmm10 -; Inst 38: movsd 88(%rcx), %xmm11 -; Inst 39: movsd 96(%rcx), %xmm12 -; Inst 40: movsd 104(%rcx), %xmm13 -; Inst 41: movsd 112(%rcx), %xmm14 -; Inst 42: movsd 120(%rcx), %xmm15 -; Inst 43: movsd 128(%rcx), %xmm2 -; Inst 44: movdqu %xmm2, rsp(0 + virtual offset) -; Inst 45: movsd 136(%rcx), %xmm2 -; Inst 46: movsd 144(%rcx), %xmm0 -; Inst 47: movdqu %xmm0, rsp(16 + virtual offset) -; Inst 48: movdqu rsp(48 + virtual offset), %xmm0 -; Inst 49: addsd %xmm1, %xmm0 -; Inst 50: movdqu rsp(32 + virtual offset), %xmm1 -; Inst 51: addsd %xmm3, %xmm1 -; Inst 52: addsd %xmm5, %xmm4 -; Inst 53: addsd %xmm7, %xmm6 -; Inst 54: addsd %xmm9, %xmm8 -; Inst 55: addsd %xmm11, %xmm10 -; Inst 56: addsd %xmm13, %xmm12 -; Inst 57: addsd %xmm15, %xmm14 -; Inst 58: movdqu rsp(0 + virtual offset), %xmm3 -; Inst 59: addsd %xmm2, %xmm3 -; Inst 60: movdqu rsp(16 + virtual offset), %xmm2 -; Inst 61: addsd 152(%rcx), %xmm2 -; Inst 62: addsd %xmm1, %xmm0 -; Inst 63: addsd %xmm6, %xmm4 -; Inst 64: addsd %xmm10, %xmm8 -; Inst 65: addsd %xmm14, %xmm12 -; Inst 66: addsd %xmm2, %xmm3 -; Inst 67: addsd %xmm4, %xmm0 -; Inst 68: addsd %xmm12, %xmm8 -; Inst 69: addsd %xmm8, %xmm0 -; Inst 70: addsd %xmm3, %xmm0 -; Inst 71: movdqu 64(%rsp), %xmm6 -; Inst 72: movdqu 80(%rsp), %xmm7 -; Inst 73: movdqu 96(%rsp), %xmm8 -; Inst 74: movdqu 112(%rsp), %xmm9 -; Inst 75: movdqu 128(%rsp), %xmm10 -; Inst 76: movdqu 144(%rsp), %xmm11 -; Inst 77: movdqu 160(%rsp), %xmm12 -; Inst 78: movdqu 176(%rsp), %xmm13 -; Inst 79: movdqu 192(%rsp), %xmm14 -; Inst 80: movdqu 208(%rsp), %xmm15 -; Inst 81: addq $224, %rsp -; Inst 82: movq %rbp, %rsp -; Inst 83: popq %rbp -; Inst 84: ret -; }} +; pushq %rbp +; unwind PushFrameRegs { offset_upward_to_caller_sp: 16 } +; movq %rsp, %rbp +; unwind DefineNewFrame { offset_upward_to_caller_sp: 16, offset_downward_to_clobbers: 144 } +; subq %rsp, $240, %rsp +; movdqu %xmm6, 96(%rsp) +; unwind SaveReg { clobber_offset: 0, reg: p6f } +; movdqu %xmm7, 112(%rsp) +; unwind SaveReg { clobber_offset: 16, reg: p7f } +; movdqu %xmm8, 128(%rsp) +; unwind SaveReg { clobber_offset: 32, reg: p8f } +; movdqu %xmm9, 144(%rsp) +; unwind SaveReg { clobber_offset: 48, reg: p9f } +; movdqu %xmm10, 160(%rsp) +; unwind SaveReg { clobber_offset: 64, reg: p10f } +; movdqu %xmm11, 176(%rsp) +; unwind SaveReg { clobber_offset: 80, reg: p11f } +; movdqu %xmm12, 192(%rsp) +; unwind SaveReg { clobber_offset: 96, reg: p12f } +; movdqu %xmm13, 208(%rsp) +; unwind SaveReg { clobber_offset: 112, reg: p13f } +; movdqu %xmm14, 224(%rsp) +; unwind SaveReg { clobber_offset: 128, reg: p14f } +; block0: +; movsd 0(%rcx), %xmm0 +; movsd 8(%rcx), %xmm12 +; movdqu %xmm12, rsp(80 + virtual offset) +; movsd 16(%rcx), %xmm6 +; movdqu %xmm6, rsp(0 + virtual offset) +; movsd 24(%rcx), %xmm2 +; movdqu %xmm2, rsp(64 + virtual offset) +; movsd 32(%rcx), %xmm14 +; movsd 40(%rcx), %xmm3 +; movdqu %xmm3, rsp(48 + virtual offset) +; movsd 48(%rcx), %xmm9 +; movsd 56(%rcx), %xmm7 +; movdqu %xmm7, rsp(32 + virtual offset) +; movsd 64(%rcx), %xmm13 +; movsd 72(%rcx), %xmm10 +; movdqu %xmm10, rsp(16 + virtual offset) +; movsd 80(%rcx), %xmm11 +; movsd 88(%rcx), %xmm10 +; movsd 96(%rcx), %xmm5 +; movsd 104(%rcx), %xmm12 +; movsd 112(%rcx), %xmm1 +; movsd 120(%rcx), %xmm2 +; movsd 128(%rcx), %xmm8 +; movsd 136(%rcx), %xmm3 +; movsd 144(%rcx), %xmm4 +; movdqu rsp(80 + virtual offset), %xmm6 +; addsd %xmm0, %xmm0, %xmm6 +; movdqu rsp(0 + virtual offset), %xmm6 +; movdqu rsp(64 + virtual offset), %xmm7 +; addsd %xmm6, %xmm6, %xmm7 +; movdqu rsp(48 + virtual offset), %xmm7 +; addsd %xmm14, %xmm14, %xmm7 +; movdqu rsp(32 + virtual offset), %xmm7 +; addsd %xmm9, %xmm9, %xmm7 +; movdqu rsp(16 + virtual offset), %xmm7 +; addsd %xmm13, %xmm13, %xmm7 +; addsd %xmm11, %xmm11, %xmm10 +; addsd %xmm5, %xmm5, %xmm12 +; addsd %xmm1, %xmm1, %xmm2 +; addsd %xmm8, %xmm8, %xmm3 +; addsd %xmm4, 152(%xmm4), %rcx +; addsd %xmm0, %xmm0, %xmm6 +; addsd %xmm14, %xmm14, %xmm9 +; addsd %xmm13, %xmm13, %xmm11 +; addsd %xmm5, %xmm5, %xmm1 +; addsd %xmm8, %xmm8, %xmm4 +; addsd %xmm0, %xmm0, %xmm14 +; addsd %xmm13, %xmm13, %xmm5 +; addsd %xmm0, %xmm0, %xmm13 +; addsd %xmm0, %xmm0, %xmm8 +; movdqu 96(%rsp), %xmm6 +; movdqu 112(%rsp), %xmm7 +; movdqu 128(%rsp), %xmm8 +; movdqu 144(%rsp), %xmm9 +; movdqu 160(%rsp), %xmm10 +; movdqu 176(%rsp), %xmm11 +; movdqu 192(%rsp), %xmm12 +; movdqu 208(%rsp), %xmm13 +; movdqu 224(%rsp), %xmm14 +; addq %rsp, $240, %rsp +; movq %rbp, %rsp +; popq %rbp +; ret diff --git a/cranelift/filetests/filetests/isa/x64/floating-point.clif b/cranelift/filetests/filetests/isa/x64/floating-point.clif index eaf9760328..153904de83 100644 --- a/cranelift/filetests/filetests/isa/x64/floating-point.clif +++ b/cranelift/filetests/filetests/isa/x64/floating-point.clif @@ -7,21 +7,17 @@ block0(v0: f64): return v1 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 9) -; Inst 0: pushq %rbp -; Inst 1: movq %rsp, %rbp -; Inst 2: movabsq $9223372036854775807, %rsi -; Inst 3: movq %rsi, %xmm1 -; Inst 4: andpd %xmm0, %xmm1 -; Inst 5: movaps %xmm1, %xmm0 -; Inst 6: movq %rbp, %rsp -; Inst 7: popq %rbp -; Inst 8: ret -; }} +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movdqa %xmm0, %xmm5 +; movabsq $9223372036854775807, %rdx +; movq %rdx, %xmm0 +; movdqa %xmm5, %xmm7 +; andpd %xmm0, %xmm0, %xmm7 +; movq %rbp, %rsp +; popq %rbp +; ret function %f(i64) -> f64 { block0(v0: i64): @@ -30,20 +26,14 @@ block0(v0: i64): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 10) -; Inst 0: pushq %rbp -; Inst 1: movq %rsp, %rbp -; Inst 2: movsd 0(%rdi), %xmm0 -; Inst 3: movabsq $9223372036854775807, %rsi -; Inst 4: movq %rsi, %xmm1 -; Inst 5: andpd %xmm0, %xmm1 -; Inst 6: movaps %xmm1, %xmm0 -; Inst 7: movq %rbp, %rsp -; Inst 8: popq %rbp -; Inst 9: ret -; }} +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movsd 0(%rdi), %xmm5 +; movabsq $9223372036854775807, %r8 +; movq %r8, %xmm0 +; andpd %xmm0, %xmm0, %xmm5 +; movq %rbp, %rsp +; popq %rbp +; ret diff --git a/cranelift/filetests/filetests/isa/x64/heap.clif b/cranelift/filetests/filetests/isa/x64/heap.clif index c7f3664750..65fd524663 100644 --- a/cranelift/filetests/filetests/isa/x64/heap.clif +++ b/cranelift/filetests/filetests/isa/x64/heap.clif @@ -13,36 +13,24 @@ block0(v0: i32, v1: i64): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (successor: Block 1) -; (successor: Block 2) -; (instruction range: 0 .. 9) -; Inst 0: pushq %rbp -; Inst 1: movq %rsp, %rbp -; Inst 2: movl %edi, %ecx -; Inst 3: movq 8(%rsi), %rdi -; Inst 4: movq %rcx, %rax -; Inst 5: addq $32768, %rax -; Inst 6: jnb ; ud2 heap_oob ; -; Inst 7: cmpq %rdi, %rax -; Inst 8: jbe label1; j label2 -; Block 1: -; (original IR block: block2) -; (instruction range: 9 .. 17) -; Inst 9: addq 0(%rsi), %rcx -; Inst 10: xorq %rsi, %rsi -; Inst 11: cmpq %rdi, %rax -; Inst 12: cmovnbeq %rsi, %rcx -; Inst 13: movq %rcx, %rax -; Inst 14: movq %rbp, %rsp -; Inst 15: popq %rbp -; Inst 16: ret -; Block 2: -; (original IR block: block1) -; (instruction range: 17 .. 18) -; Inst 17: ud2 heap_oob -; }} +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movl %edi, %eax +; movq 8(%rsi), %rcx +; movq %rax, %rdx +; addq %rdx, $32768, %rdx +; jnb ; ud2 heap_oob ; +; cmpq %rcx, %rdx +; jbe label1; j label2 +; block1: +; addq %rax, 0(%rsi), %rax +; xorq %r8, %r8, %r8 +; cmpq %rcx, %rdx +; cmovnbeq %r8, %rax, %rax +; movq %rbp, %rsp +; popq %rbp +; ret +; block2: +; ud2 heap_oob diff --git a/cranelift/filetests/filetests/isa/x64/i128.clif b/cranelift/filetests/filetests/isa/x64/i128.clif index 01bb461730..51cc8129b6 100644 --- a/cranelift/filetests/filetests/isa/x64/i128.clif +++ b/cranelift/filetests/filetests/isa/x64/i128.clif @@ -8,21 +8,16 @@ block0(v0: i128, v1: i128): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 9) -; Inst 0: pushq %rbp -; Inst 1: movq %rsp, %rbp -; Inst 2: addq %rdx, %rdi -; Inst 3: adcq %rcx, %rsi -; Inst 4: movq %rdi, %rax -; Inst 5: movq %rsi, %rdx -; Inst 6: movq %rbp, %rsp -; Inst 7: popq %rbp -; Inst 8: ret -; }} +; pushq %rbp +; movq %rsp, %rbp +; block0: +; addq %rdi, %rdx, %rdi +; adcq %rsi, %rcx, %rsi +; movq %rdi, %rax +; movq %rsi, %rdx +; movq %rbp, %rsp +; popq %rbp +; ret function %f1(i128, i128) -> i128 { block0(v0: i128, v1: i128): @@ -30,21 +25,16 @@ block0(v0: i128, v1: i128): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 9) -; Inst 0: pushq %rbp -; Inst 1: movq %rsp, %rbp -; Inst 2: subq %rdx, %rdi -; Inst 3: sbbq %rcx, %rsi -; Inst 4: movq %rdi, %rax -; Inst 5: movq %rsi, %rdx -; Inst 6: movq %rbp, %rsp -; Inst 7: popq %rbp -; Inst 8: ret -; }} +; pushq %rbp +; movq %rsp, %rbp +; block0: +; subq %rdi, %rdx, %rdi +; sbbq %rsi, %rcx, %rsi +; movq %rdi, %rax +; movq %rsi, %rdx +; movq %rbp, %rsp +; popq %rbp +; ret function %f2(i128, i128) -> i128 { block0(v0: i128, v1: i128): @@ -52,21 +42,16 @@ block0(v0: i128, v1: i128): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 9) -; Inst 0: pushq %rbp -; Inst 1: movq %rsp, %rbp -; Inst 2: andq %rdx, %rdi -; Inst 3: andq %rcx, %rsi -; Inst 4: movq %rdi, %rax -; Inst 5: movq %rsi, %rdx -; Inst 6: movq %rbp, %rsp -; Inst 7: popq %rbp -; Inst 8: ret -; }} +; pushq %rbp +; movq %rsp, %rbp +; block0: +; andq %rdi, %rdx, %rdi +; andq %rsi, %rcx, %rsi +; movq %rdi, %rax +; movq %rsi, %rdx +; movq %rbp, %rsp +; popq %rbp +; ret function %f3(i128, i128) -> i128 { block0(v0: i128, v1: i128): @@ -74,21 +59,16 @@ block0(v0: i128, v1: i128): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 9) -; Inst 0: pushq %rbp -; Inst 1: movq %rsp, %rbp -; Inst 2: orq %rdx, %rdi -; Inst 3: orq %rcx, %rsi -; Inst 4: movq %rdi, %rax -; Inst 5: movq %rsi, %rdx -; Inst 6: movq %rbp, %rsp -; Inst 7: popq %rbp -; Inst 8: ret -; }} +; pushq %rbp +; movq %rsp, %rbp +; block0: +; orq %rdi, %rdx, %rdi +; orq %rsi, %rcx, %rsi +; movq %rdi, %rax +; movq %rsi, %rdx +; movq %rbp, %rsp +; popq %rbp +; ret function %f4(i128, i128) -> i128 { block0(v0: i128, v1: i128): @@ -96,21 +76,16 @@ block0(v0: i128, v1: i128): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 9) -; Inst 0: pushq %rbp -; Inst 1: movq %rsp, %rbp -; Inst 2: xorq %rdx, %rdi -; Inst 3: xorq %rcx, %rsi -; Inst 4: movq %rdi, %rax -; Inst 5: movq %rsi, %rdx -; Inst 6: movq %rbp, %rsp -; Inst 7: popq %rbp -; Inst 8: ret -; }} +; pushq %rbp +; movq %rsp, %rbp +; block0: +; xorq %rdi, %rdx, %rdi +; xorq %rsi, %rcx, %rsi +; movq %rdi, %rax +; movq %rsi, %rdx +; movq %rbp, %rsp +; popq %rbp +; ret function %f5(i128) -> i128 { block0(v0: i128): @@ -118,24 +93,16 @@ block0(v0: i128): return v1 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 12) -; Inst 0: pushq %rbp -; Inst 1: movq %rsp, %rbp -; Inst 2: movq %rsi, %rax -; Inst 3: movq %rdi, %rsi -; Inst 4: notq %rsi -; Inst 5: movq %rax, %rdi -; Inst 6: notq %rdi -; Inst 7: movq %rsi, %rax -; Inst 8: movq %rdi, %rdx -; Inst 9: movq %rbp, %rsp -; Inst 10: popq %rbp -; Inst 11: ret -; }} +; pushq %rbp +; movq %rsp, %rbp +; block0: +; notq %rdi, %rdi +; notq %rsi, %rsi +; movq %rdi, %rax +; movq %rsi, %rdx +; movq %rbp, %rsp +; popq %rbp +; ret function %f6(i128, i128) -> i128 { block0(v0: i128, v1: i128): @@ -143,26 +110,24 @@ block0(v0: i128, v1: i128): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 14) -; Inst 0: pushq %rbp -; Inst 1: movq %rsp, %rbp -; Inst 2: movq %rsi, %rax -; Inst 3: movq %rdi, %rsi -; Inst 4: imulq %rcx, %rsi -; Inst 5: imulq %rdx, %rax -; Inst 6: addq %rax, %rsi -; Inst 7: movq %rdi, %rax -; Inst 8: mul %rdx -; Inst 9: addq %rdx, %rsi -; Inst 10: movq %rsi, %rdx -; Inst 11: movq %rbp, %rsp -; Inst 12: popq %rbp -; Inst 13: ret -; }} +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq %rdi, %r11 +; imulq %r11, %rcx, %r11 +; movq %rdi, %rax +; imulq %rsi, %rdx, %rsi +; movq %r11, %rdi +; addq %rdi, %rsi, %rdi +; movq %rdi, %r11 +; mul %rax, %rdx, %rax, %rdx +; movq %r11, %r8 +; addq %r8, %rdx, %r8 +; movq %r8, %r11 +; movq %r11, %rdx +; movq %rbp, %rsp +; popq %rbp +; ret function %f7(i64, i64) -> i128 { block0(v0: i64, v1: i64): @@ -170,19 +135,14 @@ block0(v0: i64, v1: i64): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 7) -; Inst 0: pushq %rbp -; Inst 1: movq %rsp, %rbp -; Inst 2: movq %rdi, %rax -; Inst 3: movq %rsi, %rdx -; Inst 4: movq %rbp, %rsp -; Inst 5: popq %rbp -; Inst 6: ret -; }} +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq %rdi, %rax +; movq %rsi, %rdx +; movq %rbp, %rsp +; popq %rbp +; ret function %f8(i128) -> i64, i64 { block0(v0: i128): @@ -190,19 +150,14 @@ block0(v0: i128): return v1, v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 7) -; Inst 0: pushq %rbp -; Inst 1: movq %rsp, %rbp -; Inst 2: movq %rdi, %rax -; Inst 3: movq %rsi, %rdx -; Inst 4: movq %rbp, %rsp -; Inst 5: popq %rbp -; Inst 6: ret -; }} +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq %rdi, %rax +; movq %rsi, %rdx +; movq %rbp, %rsp +; popq %rbp +; ret function %f9(i128, i128) -> b1 { block0(v0: i128, v1: i128): @@ -228,126 +183,123 @@ block0(v0: i128, v1: i128): return v20 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 114) -; Inst 0: pushq %rbp -; Inst 1: movq %rsp, %rbp -; Inst 2: subq $64, %rsp -; Inst 3: movq %r12, 16(%rsp) -; Inst 4: movq %r13, 24(%rsp) -; Inst 5: movq %r14, 32(%rsp) -; Inst 6: movq %rbx, 40(%rsp) -; Inst 7: movq %r15, 48(%rsp) -; Inst 8: cmpq %rdx, %rdi -; Inst 9: setz %al -; Inst 10: cmpq %rcx, %rsi -; Inst 11: setz %r8b -; Inst 12: andq %r8, %rax -; Inst 13: testq $1, %rax -; Inst 14: setnz %al -; Inst 15: movq %rax, rsp(0 + virtual offset) -; Inst 16: cmpq %rdx, %rdi -; Inst 17: setnz %al -; Inst 18: cmpq %rcx, %rsi -; Inst 19: setnz %r8b -; Inst 20: orq %r8, %rax -; Inst 21: testq $1, %rax -; Inst 22: setnz %r8b -; Inst 23: cmpq %rcx, %rsi -; Inst 24: setl %r9b -; Inst 25: setz %al -; Inst 26: cmpq %rdx, %rdi -; Inst 27: setb %r10b -; Inst 28: andq %rax, %r10 -; Inst 29: orq %r9, %r10 -; Inst 30: andq $1, %r10 -; Inst 31: setnz %r9b -; Inst 32: cmpq %rcx, %rsi -; Inst 33: setl %r10b -; Inst 34: setz %al -; Inst 35: cmpq %rdx, %rdi -; Inst 36: setbe %r11b -; Inst 37: andq %rax, %r11 -; Inst 38: orq %r10, %r11 -; Inst 39: andq $1, %r11 -; Inst 40: setnz %r10b -; Inst 41: cmpq %rcx, %rsi -; Inst 42: setnle %r11b -; Inst 43: setz %al -; Inst 44: cmpq %rdx, %rdi -; Inst 45: setnbe %r12b -; Inst 46: andq %rax, %r12 -; Inst 47: orq %r11, %r12 -; Inst 48: andq $1, %r12 -; Inst 49: setnz %r11b -; Inst 50: cmpq %rcx, %rsi -; Inst 51: setnle %r12b -; Inst 52: setz %al -; Inst 53: cmpq %rdx, %rdi -; Inst 54: setnb %r13b -; Inst 55: andq %rax, %r13 -; Inst 56: orq %r12, %r13 -; Inst 57: andq $1, %r13 -; Inst 58: setnz %r12b -; Inst 59: cmpq %rcx, %rsi -; Inst 60: setb %r13b -; Inst 61: setz %al -; Inst 62: cmpq %rdx, %rdi -; Inst 63: setb %r14b -; Inst 64: andq %rax, %r14 -; Inst 65: orq %r13, %r14 -; Inst 66: andq $1, %r14 -; Inst 67: setnz %r13b -; Inst 68: cmpq %rcx, %rsi -; Inst 69: setb %r14b -; Inst 70: setz %al -; Inst 71: cmpq %rdx, %rdi -; Inst 72: setbe %bl -; Inst 73: andq %rax, %rbx -; Inst 74: orq %r14, %rbx -; Inst 75: andq $1, %rbx -; Inst 76: setnz %r14b -; Inst 77: cmpq %rcx, %rsi -; Inst 78: setnbe %bl -; Inst 79: setz %r15b -; Inst 80: cmpq %rdx, %rdi -; Inst 81: setnbe %al -; Inst 82: andq %r15, %rax -; Inst 83: orq %rbx, %rax -; Inst 84: andq $1, %rax -; Inst 85: setnz %bl -; Inst 86: cmpq %rcx, %rsi -; Inst 87: setnbe %sil -; Inst 88: setz %cl -; Inst 89: cmpq %rdx, %rdi -; Inst 90: setnb %dil -; Inst 91: andq %rcx, %rdi -; Inst 92: orq %rsi, %rdi -; Inst 93: andq $1, %rdi -; Inst 94: setnz %sil -; Inst 95: movq rsp(0 + virtual offset), %rax -; Inst 96: andl %r8d, %eax -; Inst 97: andl %r10d, %r9d -; Inst 98: andl %r12d, %r11d -; Inst 99: andl %r14d, %r13d -; Inst 100: andl %esi, %ebx -; Inst 101: andl %r9d, %eax -; Inst 102: andl %r13d, %r11d -; Inst 103: andl %r11d, %eax -; Inst 104: andl %ebx, %eax -; Inst 105: movq 16(%rsp), %r12 -; Inst 106: movq 24(%rsp), %r13 -; Inst 107: movq 32(%rsp), %r14 -; Inst 108: movq 40(%rsp), %rbx -; Inst 109: movq 48(%rsp), %r15 -; Inst 110: addq $64, %rsp -; Inst 111: movq %rbp, %rsp -; Inst 112: popq %rbp -; Inst 113: ret -; }} +; pushq %rbp +; movq %rsp, %rbp +; subq %rsp, $64, %rsp +; movq %rbx, 16(%rsp) +; movq %r12, 24(%rsp) +; movq %r13, 32(%rsp) +; movq %r14, 40(%rsp) +; movq %r15, 48(%rsp) +; block0: +; cmpq %rdx, %rdi +; setz %al +; cmpq %rcx, %rsi +; setz %r8b +; andq %rax, %r8, %rax +; testq $1, %rax +; setnz %al +; cmpq %rdx, %rdi +; setnz %r8b +; cmpq %rcx, %rsi +; setnz %r9b +; orq %r8, %r9, %r8 +; testq $1, %r8 +; setnz %r8b +; movq %r8, rsp(8 + virtual offset) +; cmpq %rcx, %rsi +; setl %r9b +; setz %r15b +; cmpq %rdx, %rdi +; setb %r11b +; andq %r11, %r15, %r11 +; orq %r11, %r9, %r11 +; andq %r11, $1, %r11 +; setnz %r9b +; cmpq %rcx, %rsi +; setl %r11b +; setz %bl +; cmpq %rdx, %rdi +; setbe %r14b +; andq %r14, %rbx, %r14 +; orq %r14, %r11, %r14 +; andq %r14, $1, %r14 +; setnz %r8b +; movq %r8, rsp(0 + virtual offset) +; cmpq %rcx, %rsi +; setnle %r15b +; setz %r12b +; cmpq %rdx, %rdi +; setnbe %bl +; andq %rbx, %r12, %rbx +; orq %rbx, %r15, %rbx +; andq %rbx, $1, %rbx +; setnz %r15b +; cmpq %rcx, %rsi +; setnle %bl +; setz %r13b +; cmpq %rdx, %rdi +; setnb %r12b +; andq %r12, %r13, %r12 +; orq %r12, %rbx, %r12 +; andq %r12, $1, %r12 +; setnz %bl +; cmpq %rcx, %rsi +; setb %r12b +; setz %r14b +; cmpq %rdx, %rdi +; setb %r13b +; andq %r13, %r14, %r13 +; orq %r13, %r12, %r13 +; andq %r13, $1, %r13 +; setnz %r12b +; cmpq %rcx, %rsi +; setb %r13b +; setz %r8b +; cmpq %rdx, %rdi +; setbe %r14b +; andq %r14, %r8, %r14 +; orq %r14, %r13, %r14 +; andq %r14, $1, %r14 +; setnz %r13b +; cmpq %rcx, %rsi +; setnbe %r14b +; setz %r11b +; cmpq %rdx, %rdi +; setnbe %r8b +; andq %r8, %r11, %r8 +; orq %r8, %r14, %r8 +; andq %r8, $1, %r8 +; setnz %r14b +; cmpq %rcx, %rsi +; setnbe %sil +; setz %cl +; cmpq %rdx, %rdi +; setnb %r8b +; andq %r8, %rcx, %r8 +; orq %r8, %rsi, %r8 +; andq %r8, $1, %r8 +; setnz %dl +; movq rsp(8 + virtual offset), %r11 +; andl %eax, %r11d, %eax +; movq rsp(0 + virtual offset), %rdi +; andl %r9d, %edi, %r9d +; andl %r15d, %ebx, %r15d +; andl %r12d, %r13d, %r12d +; andl %r14d, %edx, %r14d +; andl %eax, %r9d, %eax +; andl %r15d, %r12d, %r15d +; andl %eax, %r15d, %eax +; andl %eax, %r14d, %eax +; movq 16(%rsp), %rbx +; movq 24(%rsp), %r12 +; movq 32(%rsp), %r13 +; movq 40(%rsp), %r14 +; movq 48(%rsp), %r15 +; addq %rsp, $64, %rsp +; movq %rbp, %rsp +; popq %rbp +; ret function %f10(i128) -> i32 { block0(v0: i128): @@ -363,36 +315,25 @@ block2: return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (successor: Block 1) -; (successor: Block 2) -; (instruction range: 0 .. 8) -; Inst 0: pushq %rbp -; Inst 1: movq %rsp, %rbp -; Inst 2: cmpq $0, %rdi -; Inst 3: setz %dil -; Inst 4: cmpq $0, %rsi -; Inst 5: setz %sil -; Inst 6: andb %dil, %sil -; Inst 7: jnz label1; j label2 -; Block 1: -; (original IR block: block1) -; (instruction range: 8 .. 12) -; Inst 8: movl $1, %eax -; Inst 9: movq %rbp, %rsp -; Inst 10: popq %rbp -; Inst 11: ret -; Block 2: -; (original IR block: block2) -; (instruction range: 12 .. 16) -; Inst 12: movl $2, %eax -; Inst 13: movq %rbp, %rsp -; Inst 14: popq %rbp -; Inst 15: ret -; }} +; pushq %rbp +; movq %rsp, %rbp +; block0: +; cmpq $0, %rdi +; setz %dil +; cmpq $0, %rsi +; setz %cl +; andb %cl, %dil, %cl +; jnz label1; j label2 +; block1: +; movl $1, %eax +; movq %rbp, %rsp +; popq %rbp +; ret +; block2: +; movl $2, %eax +; movq %rbp, %rsp +; popq %rbp +; ret function %f11(i128) -> i32 { block0(v0: i128): @@ -408,36 +349,25 @@ block2: return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (successor: Block 1) -; (successor: Block 2) -; (instruction range: 0 .. 8) -; Inst 0: pushq %rbp -; Inst 1: movq %rsp, %rbp -; Inst 2: cmpq $0, %rdi -; Inst 3: setnz %dil -; Inst 4: cmpq $0, %rsi -; Inst 5: setnz %sil -; Inst 6: orb %dil, %sil -; Inst 7: jnz label1; j label2 -; Block 1: -; (original IR block: block1) -; (instruction range: 8 .. 12) -; Inst 8: movl $1, %eax -; Inst 9: movq %rbp, %rsp -; Inst 10: popq %rbp -; Inst 11: ret -; Block 2: -; (original IR block: block2) -; (instruction range: 12 .. 16) -; Inst 12: movl $2, %eax -; Inst 13: movq %rbp, %rsp -; Inst 14: popq %rbp -; Inst 15: ret -; }} +; pushq %rbp +; movq %rsp, %rbp +; block0: +; cmpq $0, %rdi +; setnz %dil +; cmpq $0, %rsi +; setnz %cl +; orb %cl, %dil, %cl +; jnz label1; j label2 +; block1: +; movl $1, %eax +; movq %rbp, %rsp +; popq %rbp +; ret +; block2: +; movl $2, %eax +; movq %rbp, %rsp +; popq %rbp +; ret function %f12(i64) -> i128 { block0(v0: i64): @@ -445,21 +375,14 @@ block0(v0: i64): return v1 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 9) -; Inst 0: pushq %rbp -; Inst 1: movq %rsp, %rbp -; Inst 2: movq %rdi, %rsi -; Inst 3: xorq %rdi, %rdi -; Inst 4: movq %rsi, %rax -; Inst 5: movq %rdi, %rdx -; Inst 6: movq %rbp, %rsp -; Inst 7: popq %rbp -; Inst 8: ret -; }} +; pushq %rbp +; movq %rsp, %rbp +; block0: +; xorq %rdx, %rdx, %rdx +; movq %rdi, %rax +; movq %rbp, %rsp +; popq %rbp +; ret function %f13(i64) -> i128 { block0(v0: i64): @@ -467,22 +390,15 @@ block0(v0: i64): return v1 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 10) -; Inst 0: pushq %rbp -; Inst 1: movq %rsp, %rbp -; Inst 2: movq %rdi, %rsi -; Inst 3: movq %rsi, %rdi -; Inst 4: sarq $63, %rdi -; Inst 5: movq %rsi, %rax -; Inst 6: movq %rdi, %rdx -; Inst 7: movq %rbp, %rsp -; Inst 8: popq %rbp -; Inst 9: ret -; }} +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq %rdi, %rdx +; sarq $63, %rdx, %rdx +; movq %rdi, %rax +; movq %rbp, %rsp +; popq %rbp +; ret function %f14(i8) -> i128 { block0(v0: i8): @@ -490,22 +406,15 @@ block0(v0: i8): return v1 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 10) -; Inst 0: pushq %rbp -; Inst 1: movq %rsp, %rbp -; Inst 2: movsbq %dil, %rsi -; Inst 3: movq %rsi, %rdi -; Inst 4: sarq $63, %rdi -; Inst 5: movq %rsi, %rax -; Inst 6: movq %rdi, %rdx -; Inst 7: movq %rbp, %rsp -; Inst 8: popq %rbp -; Inst 9: ret -; }} +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movsbq %dil, %rax +; movq %rax, %rdx +; sarq $63, %rdx, %rdx +; movq %rbp, %rsp +; popq %rbp +; ret function %f15(i8) -> i128 { block0(v0: i8): @@ -513,21 +422,14 @@ block0(v0: i8): return v1 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 9) -; Inst 0: pushq %rbp -; Inst 1: movq %rsp, %rbp -; Inst 2: movzbq %dil, %rsi -; Inst 3: xorq %rdi, %rdi -; Inst 4: movq %rsi, %rax -; Inst 5: movq %rdi, %rdx -; Inst 6: movq %rbp, %rsp -; Inst 7: popq %rbp -; Inst 8: ret -; }} +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movzbq %dil, %rax +; xorq %rdx, %rdx, %rdx +; movq %rbp, %rsp +; popq %rbp +; ret function %f16(i128) -> i64 { block0(v0: i128): @@ -535,18 +437,13 @@ block0(v0: i128): return v1 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 6) -; Inst 0: pushq %rbp -; Inst 1: movq %rsp, %rbp -; Inst 2: movq %rdi, %rax -; Inst 3: movq %rbp, %rsp -; Inst 4: popq %rbp -; Inst 5: ret -; }} +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq %rdi, %rax +; movq %rbp, %rsp +; popq %rbp +; ret function %f17(i128) -> i8 { block0(v0: i128): @@ -554,18 +451,13 @@ block0(v0: i128): return v1 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 6) -; Inst 0: pushq %rbp -; Inst 1: movq %rsp, %rbp -; Inst 2: movq %rdi, %rax -; Inst 3: movq %rbp, %rsp -; Inst 4: popq %rbp -; Inst 5: ret -; }} +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq %rdi, %rax +; movq %rbp, %rsp +; popq %rbp +; ret function %f18(b1) -> i128 { block0(v0: b1): @@ -573,22 +465,15 @@ block0(v0: b1): return v1 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 10) -; Inst 0: pushq %rbp -; Inst 1: movq %rsp, %rbp -; Inst 2: movq %rdi, %rsi -; Inst 3: andq $1, %rsi -; Inst 4: xorq %rdi, %rdi -; Inst 5: movq %rsi, %rax -; Inst 6: movq %rdi, %rdx -; Inst 7: movq %rbp, %rsp -; Inst 8: popq %rbp -; Inst 9: ret -; }} +; pushq %rbp +; movq %rsp, %rbp +; block0: +; andq %rdi, $1, %rdi +; xorq %rdx, %rdx, %rdx +; movq %rdi, %rax +; movq %rbp, %rsp +; popq %rbp +; ret function %f19(i128) -> i128 { block0(v0: i128): @@ -596,60 +481,52 @@ block0(v0: i128): return v1 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 48) -; Inst 0: pushq %rbp -; Inst 1: movq %rsp, %rbp -; Inst 2: movq %rdi, %rax -; Inst 3: movq %rax, %rcx -; Inst 4: shrq $1, %rcx -; Inst 5: movabsq $8608480567731124087, %rdi -; Inst 6: andq %rdi, %rcx -; Inst 7: subq %rcx, %rax -; Inst 8: shrq $1, %rcx -; Inst 9: andq %rdi, %rcx -; Inst 10: subq %rcx, %rax -; Inst 11: shrq $1, %rcx -; Inst 12: andq %rdi, %rcx -; Inst 13: subq %rcx, %rax -; Inst 14: movq %rax, %rdi -; Inst 15: shrq $4, %rdi -; Inst 16: addq %rax, %rdi -; Inst 17: movabsq $1085102592571150095, %rax -; Inst 18: andq %rax, %rdi -; Inst 19: movabsq $72340172838076673, %rax -; Inst 20: imulq %rax, %rdi -; Inst 21: shrq $56, %rdi -; Inst 22: movq %rsi, %rcx -; Inst 23: shrq $1, %rcx -; Inst 24: movabsq $8608480567731124087, %rax -; Inst 25: andq %rax, %rcx -; Inst 26: subq %rcx, %rsi -; Inst 27: shrq $1, %rcx -; Inst 28: andq %rax, %rcx -; Inst 29: subq %rcx, %rsi -; Inst 30: shrq $1, %rcx -; Inst 31: andq %rax, %rcx -; Inst 32: subq %rcx, %rsi -; Inst 33: movq %rsi, %rax -; Inst 34: shrq $4, %rax -; Inst 35: addq %rsi, %rax -; Inst 36: movabsq $1085102592571150095, %rsi -; Inst 37: andq %rsi, %rax -; Inst 38: movabsq $72340172838076673, %rsi -; Inst 39: imulq %rsi, %rax -; Inst 40: shrq $56, %rax -; Inst 41: addq %rax, %rdi -; Inst 42: xorq %rsi, %rsi -; Inst 43: movq %rdi, %rax -; Inst 44: movq %rsi, %rdx -; Inst 45: movq %rbp, %rsp -; Inst 46: popq %rbp -; Inst 47: ret -; }} +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq %rdi, %r9 +; shrq $1, %r9, %r9 +; movabsq $8608480567731124087, %rax +; andq %r9, %rax, %r9 +; subq %rdi, %r9, %rdi +; shrq $1, %r9, %r9 +; andq %r9, %rax, %r9 +; subq %rdi, %r9, %rdi +; shrq $1, %r9, %r9 +; andq %r9, %rax, %r9 +; subq %rdi, %r9, %rdi +; movq %rdi, %rax +; shrq $4, %rax, %rax +; addq %rax, %rdi, %rax +; movabsq $1085102592571150095, %r9 +; andq %rax, %r9, %rax +; movabsq $72340172838076673, %rcx +; imulq %rax, %rcx, %rax +; shrq $56, %rax, %rax +; movq %rsi, %rdi +; shrq $1, %rdi, %rdi +; movabsq $8608480567731124087, %rcx +; andq %rdi, %rcx, %rdi +; subq %rsi, %rdi, %rsi +; shrq $1, %rdi, %rdi +; andq %rdi, %rcx, %rdi +; subq %rsi, %rdi, %rsi +; shrq $1, %rdi, %rdi +; andq %rdi, %rcx, %rdi +; subq %rsi, %rdi, %rsi +; movq %rsi, %rcx +; shrq $4, %rcx, %rcx +; addq %rcx, %rsi, %rcx +; movabsq $1085102592571150095, %rsi +; andq %rcx, %rsi, %rcx +; movabsq $72340172838076673, %rdx +; imulq %rcx, %rdx, %rcx +; shrq $56, %rcx, %rcx +; addq %rax, %rcx, %rax +; xorq %rdx, %rdx, %rdx +; movq %rbp, %rsp +; popq %rbp +; ret function %f20(i128) -> i128 { block0(v0: i128): @@ -657,102 +534,94 @@ block0(v0: i128): return v1 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 90) -; Inst 0: pushq %rbp -; Inst 1: movq %rsp, %rbp -; Inst 2: movq %rsi, %rcx -; Inst 3: movabsq $6148914691236517205, %rsi -; Inst 4: movq %rcx, %rax -; Inst 5: andq %rsi, %rax -; Inst 6: shrq $1, %rcx -; Inst 7: andq %rsi, %rcx -; Inst 8: shlq $1, %rax -; Inst 9: orq %rcx, %rax -; Inst 10: movabsq $3689348814741910323, %rsi -; Inst 11: movq %rax, %rcx -; Inst 12: andq %rsi, %rcx -; Inst 13: shrq $2, %rax -; Inst 14: andq %rsi, %rax -; Inst 15: shlq $2, %rcx -; Inst 16: orq %rax, %rcx -; Inst 17: movabsq $1085102592571150095, %rsi -; Inst 18: movq %rcx, %rax -; Inst 19: andq %rsi, %rax -; Inst 20: shrq $4, %rcx -; Inst 21: andq %rsi, %rcx -; Inst 22: shlq $4, %rax -; Inst 23: orq %rcx, %rax -; Inst 24: movabsq $71777214294589695, %rsi -; Inst 25: movq %rax, %rcx -; Inst 26: andq %rsi, %rcx -; Inst 27: shrq $8, %rax -; Inst 28: andq %rsi, %rax -; Inst 29: shlq $8, %rcx -; Inst 30: orq %rax, %rcx -; Inst 31: movabsq $281470681808895, %rsi -; Inst 32: movq %rcx, %rax -; Inst 33: andq %rsi, %rax -; Inst 34: shrq $16, %rcx -; Inst 35: andq %rsi, %rcx -; Inst 36: shlq $16, %rax -; Inst 37: orq %rcx, %rax -; Inst 38: movabsq $4294967295, %rcx -; Inst 39: movq %rax, %rsi -; Inst 40: andq %rcx, %rsi -; Inst 41: shrq $32, %rax -; Inst 42: shlq $32, %rsi -; Inst 43: orq %rax, %rsi -; Inst 44: movabsq $6148914691236517205, %rax -; Inst 45: movq %rdi, %rcx -; Inst 46: andq %rax, %rcx -; Inst 47: shrq $1, %rdi -; Inst 48: andq %rax, %rdi -; Inst 49: shlq $1, %rcx -; Inst 50: orq %rdi, %rcx -; Inst 51: movabsq $3689348814741910323, %rdi -; Inst 52: movq %rcx, %rax -; Inst 53: andq %rdi, %rax -; Inst 54: shrq $2, %rcx -; Inst 55: andq %rdi, %rcx -; Inst 56: shlq $2, %rax -; Inst 57: orq %rcx, %rax -; Inst 58: movabsq $1085102592571150095, %rdi -; Inst 59: movq %rax, %rcx -; Inst 60: andq %rdi, %rcx -; Inst 61: shrq $4, %rax -; Inst 62: andq %rdi, %rax -; Inst 63: shlq $4, %rcx -; Inst 64: orq %rax, %rcx -; Inst 65: movabsq $71777214294589695, %rdi -; Inst 66: movq %rcx, %rax -; Inst 67: andq %rdi, %rax -; Inst 68: shrq $8, %rcx -; Inst 69: andq %rdi, %rcx -; Inst 70: shlq $8, %rax -; Inst 71: orq %rcx, %rax -; Inst 72: movabsq $281470681808895, %rdi -; Inst 73: movq %rax, %rcx -; Inst 74: andq %rdi, %rcx -; Inst 75: shrq $16, %rax -; Inst 76: andq %rdi, %rax -; Inst 77: shlq $16, %rcx -; Inst 78: orq %rax, %rcx -; Inst 79: movabsq $4294967295, %rax -; Inst 80: movq %rcx, %rdi -; Inst 81: andq %rax, %rdi -; Inst 82: shrq $32, %rcx -; Inst 83: shlq $32, %rdi -; Inst 84: orq %rcx, %rdi -; Inst 85: movq %rsi, %rax -; Inst 86: movq %rdi, %rdx -; Inst 87: movq %rbp, %rsp -; Inst 88: popq %rbp -; Inst 89: ret -; }} +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movabsq $6148914691236517205, %r9 +; movq %rsi, %rax +; andq %rax, %r9, %rax +; shrq $1, %rsi, %rsi +; andq %rsi, %r9, %rsi +; shlq $1, %rax, %rax +; orq %rax, %rsi, %rax +; movabsq $3689348814741910323, %rcx +; movq %rax, %rdx +; andq %rdx, %rcx, %rdx +; shrq $2, %rax, %rax +; andq %rax, %rcx, %rax +; shlq $2, %rdx, %rdx +; orq %rdx, %rax, %rdx +; movabsq $1085102592571150095, %r9 +; movq %rdx, %rsi +; andq %rsi, %r9, %rsi +; shrq $4, %rdx, %rdx +; andq %rdx, %r9, %rdx +; shlq $4, %rsi, %rsi +; orq %rsi, %rdx, %rsi +; movabsq $71777214294589695, %rax +; movq %rsi, %rdx +; andq %rdx, %rax, %rdx +; shrq $8, %rsi, %rsi +; andq %rsi, %rax, %rsi +; shlq $8, %rdx, %rdx +; orq %rdx, %rsi, %rdx +; movabsq $281470681808895, %r9 +; movq %rdx, %r11 +; andq %r11, %r9, %r11 +; shrq $16, %rdx, %rdx +; andq %rdx, %r9, %rdx +; shlq $16, %r11, %r11 +; orq %r11, %rdx, %r11 +; movabsq $4294967295, %rcx +; movq %r11, %rax +; andq %rax, %rcx, %rax +; shrq $32, %r11, %r11 +; shlq $32, %rax, %rax +; orq %rax, %r11, %rax +; movabsq $6148914691236517205, %rdx +; movq %rdi, %r9 +; andq %r9, %rdx, %r9 +; shrq $1, %rdi, %rdi +; andq %rdi, %rdx, %rdi +; shlq $1, %r9, %r9 +; orq %r9, %rdi, %r9 +; movabsq $3689348814741910323, %rsi +; movq %r9, %rcx +; andq %rcx, %rsi, %rcx +; shrq $2, %r9, %r9 +; andq %r9, %rsi, %r9 +; shlq $2, %rcx, %rcx +; orq %rcx, %r9, %rcx +; movabsq $1085102592571150095, %rdx +; movq %rcx, %r9 +; andq %r9, %rdx, %r9 +; shrq $4, %rcx, %rcx +; andq %rcx, %rdx, %rcx +; shlq $4, %r9, %r9 +; orq %r9, %rcx, %r9 +; movabsq $71777214294589695, %rsi +; movq %r9, %rcx +; andq %rcx, %rsi, %rcx +; shrq $8, %r9, %r9 +; andq %r9, %rsi, %r9 +; shlq $8, %rcx, %rcx +; orq %rcx, %r9, %rcx +; movabsq $281470681808895, %rdx +; movq %rcx, %r8 +; andq %r8, %rdx, %r8 +; shrq $16, %rcx, %rcx +; andq %rcx, %rdx, %rcx +; shlq $16, %r8, %r8 +; orq %r8, %rcx, %r8 +; movabsq $4294967295, %rsi +; movq %r8, %rdx +; andq %rdx, %rsi, %rdx +; shrq $32, %r8, %r8 +; shlq $32, %rdx, %rdx +; orq %rdx, %r8, %rdx +; movq %rbp, %rsp +; popq %rbp +; ret function %f21(i128, i64) { block0(v0: i128, v1: i64): @@ -760,19 +629,14 @@ block0(v0: i128, v1: i64): return } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 7) -; Inst 0: pushq %rbp -; Inst 1: movq %rsp, %rbp -; Inst 2: movq %rdi, 0(%rdx) -; Inst 3: movq %rsi, 8(%rdx) -; Inst 4: movq %rbp, %rsp -; Inst 5: popq %rbp -; Inst 6: ret -; }} +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq %rdi, 0(%rdx) +; movq %rsi, 8(%rdx) +; movq %rbp, %rsp +; popq %rbp +; ret function %f22(i64) -> i128 { block0(v0: i64): @@ -780,21 +644,14 @@ block0(v0: i64): return v1 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 9) -; Inst 0: pushq %rbp -; Inst 1: movq %rsp, %rbp -; Inst 2: movq 0(%rdi), %rsi -; Inst 3: movq 8(%rdi), %rdi -; Inst 4: movq %rsi, %rax -; Inst 5: movq %rdi, %rdx -; Inst 6: movq %rbp, %rsp -; Inst 7: popq %rbp -; Inst 8: ret -; }} +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq 0(%rdi), %rax +; movq 8(%rdi), %rdx +; movq %rbp, %rsp +; popq %rbp +; ret function %f23(i128, b1) -> i128 { block0(v0: i128, v1: b1): @@ -813,46 +670,31 @@ block2(v6: i128): return v8 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (successor: Block 1) -; (successor: Block 2) -; (instruction range: 0 .. 4) -; Inst 0: pushq %rbp -; Inst 1: movq %rsp, %rbp -; Inst 2: testb $1, %dl -; Inst 3: jnz label1; j label2 -; Block 1: -; (original IR block: block1) -; (instruction range: 4 .. 15) -; Inst 4: xorq %rdi, %rdi -; Inst 5: xorq %rsi, %rsi -; Inst 6: movl $1, %ecx -; Inst 7: xorq %rax, %rax -; Inst 8: addq %rcx, %rdi -; Inst 9: adcq %rax, %rsi -; Inst 10: movq %rdi, %rax -; Inst 11: movq %rsi, %rdx -; Inst 12: movq %rbp, %rsp -; Inst 13: popq %rbp -; Inst 14: ret -; Block 2: -; (original IR block: block2) -; (instruction range: 15 .. 26) -; Inst 15: xorq %rdi, %rdi -; Inst 16: xorq %rsi, %rsi -; Inst 17: movl $2, %ecx -; Inst 18: xorq %rax, %rax -; Inst 19: addq %rcx, %rdi -; Inst 20: adcq %rax, %rsi -; Inst 21: movq %rdi, %rax -; Inst 22: movq %rsi, %rdx -; Inst 23: movq %rbp, %rsp -; Inst 24: popq %rbp -; Inst 25: ret -; }} +; pushq %rbp +; movq %rsp, %rbp +; block0: +; testb $1, %dl +; jnz label1; j label2 +; block1: +; xorq %rax, %rax, %rax +; xorq %rdx, %rdx, %rdx +; movl $1, %r8d +; xorq %r9, %r9, %r9 +; addq %rax, %r8, %rax +; adcq %rdx, %r9, %rdx +; movq %rbp, %rsp +; popq %rbp +; ret +; block2: +; xorq %rax, %rax, %rax +; xorq %rdx, %rdx, %rdx +; movl $2, %edi +; xorq %rcx, %rcx, %rcx +; addq %rax, %rdi, %rax +; adcq %rdx, %rcx, %rdx +; movq %rbp, %rsp +; popq %rbp +; ret function %f24(i128, i128, i64, i128, i128, i128) -> i128 { @@ -866,43 +708,38 @@ block0(v0: i128, v1: i128, v2: i64, v3: i128, v4: i128, v5: i128): return v11 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 31) -; Inst 0: pushq %rbp -; Inst 1: movq %rsp, %rbp -; Inst 2: subq $16, %rsp -; Inst 3: movq %r12, 0(%rsp) -; Inst 4: movq %r13, 8(%rsp) -; Inst 5: movq %r9, %r11 -; Inst 6: movq 16(%rbp), %r13 -; Inst 7: movq 24(%rbp), %r12 -; Inst 8: movq 32(%rbp), %r10 -; Inst 9: movq 40(%rbp), %r9 -; Inst 10: movq 48(%rbp), %rax -; Inst 11: addq %rdx, %rdi -; Inst 12: movq %rsi, %rdx -; Inst 13: adcq %rcx, %rdx -; Inst 14: xorq %rsi, %rsi -; Inst 15: addq %r8, %r11 -; Inst 16: adcq %rsi, %r13 -; Inst 17: addq %r9, %r12 -; Inst 18: adcq %rax, %r10 -; Inst 19: addq %r11, %rdi -; Inst 20: adcq %r13, %rdx -; Inst 21: addq %rdi, %r12 -; Inst 22: adcq %rdx, %r10 -; Inst 23: movq %r12, %rax -; Inst 24: movq %r10, %rdx -; Inst 25: movq 0(%rsp), %r12 -; Inst 26: movq 8(%rsp), %r13 -; Inst 27: addq $16, %rsp -; Inst 28: movq %rbp, %rsp -; Inst 29: popq %rbp -; Inst 30: ret -; }} +; pushq %rbp +; movq %rsp, %rbp +; subq %rsp, $64, %rsp +; movq %rbx, 32(%rsp) +; movq %r13, 40(%rsp) +; movq %r15, 48(%rsp) +; block0: +; movq %rdx, rsp(0 + virtual offset) +; movq 16(%rbp), %r11 +; movq 24(%rbp), %rax +; movq 32(%rbp), %rdx +; movq 40(%rbp), %r15 +; movq 48(%rbp), %rbx +; movq rsp(0 + virtual offset), %r13 +; addq %rdi, %r13, %rdi +; adcq %rsi, %rcx, %rsi +; xorq %rcx, %rcx, %rcx +; addq %r9, %r8, %r9 +; adcq %r11, %rcx, %r11 +; addq %rax, %r15, %rax +; adcq %rdx, %rbx, %rdx +; addq %rdi, %r9, %rdi +; adcq %rsi, %r11, %rsi +; addq %rax, %rdi, %rax +; adcq %rdx, %rsi, %rdx +; movq 32(%rsp), %rbx +; movq 40(%rsp), %r13 +; movq 48(%rsp), %r15 +; addq %rsp, $64, %rsp +; movq %rbp, %rsp +; popq %rbp +; ret function %f25(i128) -> i128, i128, i128, i64, i128, i128 { block0(v0: i128): @@ -910,49 +747,41 @@ block0(v0: i128): return v0, v0, v0, v1, v0, v0 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 37) -; Inst 0: pushq %rbp -; Inst 1: movq %rsp, %rbp -; Inst 2: subq $32, %rsp -; Inst 3: movq %r12, 0(%rsp) -; Inst 4: movq %r13, 8(%rsp) -; Inst 5: movq %r14, 16(%rsp) -; Inst 6: movq %rbx, 24(%rsp) -; Inst 7: movq %rsi, %rax -; Inst 8: movq %rdx, %rsi -; Inst 9: movq %rdi, %r12 -; Inst 10: movq %rdi, %rcx -; Inst 11: movq %rax, %rdx -; Inst 12: movq %rdi, %r8 -; Inst 13: movq %rax, %r9 -; Inst 14: movq %rdi, %r10 -; Inst 15: movq %rax, %r11 -; Inst 16: movq %rdi, %r13 -; Inst 17: movq %rax, %r14 -; Inst 18: movq %rax, %rbx -; Inst 19: movq %rcx, %rax -; Inst 20: movq %r8, 0(%rsi) -; Inst 21: movq %r9, 8(%rsi) -; Inst 22: movq %r10, 16(%rsi) -; Inst 23: movq %r11, 24(%rsi) -; Inst 24: movq %r12, 32(%rsi) -; Inst 25: movq %r13, 40(%rsi) -; Inst 26: movq %r14, 48(%rsi) -; Inst 27: movq %rdi, 56(%rsi) -; Inst 28: movq %rbx, 64(%rsi) -; Inst 29: movq 0(%rsp), %r12 -; Inst 30: movq 8(%rsp), %r13 -; Inst 31: movq 16(%rsp), %r14 -; Inst 32: movq 24(%rsp), %rbx -; Inst 33: addq $32, %rsp -; Inst 34: movq %rbp, %rsp -; Inst 35: popq %rbp -; Inst 36: ret -; }} +; pushq %rbp +; movq %rsp, %rbp +; subq %rsp, $32, %rsp +; movq %rbx, 0(%rsp) +; movq %r13, 8(%rsp) +; movq %r14, 16(%rsp) +; movq %r15, 24(%rsp) +; block0: +; movq %rdx, %r14 +; movq %rdi, %rax +; movq %rsi, %rdx +; movq %rdi, %rbx +; movq %rsi, %r13 +; movq %rdi, %r15 +; movq %rsi, %r11 +; movq %rdi, %rcx +; movq %rdi, %r8 +; movq %rsi, %r9 +; movq %rbx, 0(%r14) +; movq %r13, 8(%r14) +; movq %r15, 16(%r14) +; movq %r11, 24(%r14) +; movq %rcx, 32(%r14) +; movq %r8, 40(%r14) +; movq %r9, 48(%r14) +; movq %rdi, 56(%r14) +; movq %rsi, 64(%r14) +; movq 0(%rsp), %rbx +; movq 8(%rsp), %r13 +; movq 16(%rsp), %r14 +; movq 24(%rsp), %r15 +; addq %rsp, $32, %rsp +; movq %rbp, %rsp +; popq %rbp +; ret function %f26(i128, i128) -> i128, i128 { fn0 = %g(i128, i128) -> i128, i128 @@ -961,33 +790,29 @@ block0(v0: i128, v1: i128): return v2, v3 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 21) -; Inst 0: pushq %rbp -; Inst 1: movq %rsp, %rbp -; Inst 2: subq $16, %rsp -; Inst 3: movq %r12, 0(%rsp) -; Inst 4: movq %r8, %r12 -; Inst 5: subq $16, %rsp -; Inst 6: virtual_sp_offset_adjust 16 -; Inst 7: lea 0(%rsp), %r8 -; Inst 8: load_ext_name %g+0, %rax -; Inst 9: call *%rax -; Inst 10: movq 0(%rsp), %rsi -; Inst 11: movq 8(%rsp), %rdi -; Inst 12: addq $16, %rsp -; Inst 13: virtual_sp_offset_adjust -16 -; Inst 14: movq %rsi, 0(%r12) -; Inst 15: movq %rdi, 8(%r12) -; Inst 16: movq 0(%rsp), %r12 -; Inst 17: addq $16, %rsp -; Inst 18: movq %rbp, %rsp -; Inst 19: popq %rbp -; Inst 20: ret -; }} +; pushq %rbp +; movq %rsp, %rbp +; subq %rsp, $16, %rsp +; movq %r12, 0(%rsp) +; block0: +; movq %r8, %r12 +; subq %rsp, $16, %rsp +; virtual_sp_offset_adjust 16 +; lea 0(%rsp), %r8 +; load_ext_name %g+0, %r9 +; call *%r9 +; movq 0(%rsp), %r11 +; movq 8(%rsp), %rdi +; addq %rsp, $16, %rsp +; virtual_sp_offset_adjust -16 +; movq %r12, %r8 +; movq %r11, 0(%r8) +; movq %rdi, 8(%r8) +; movq 0(%rsp), %r12 +; addq %rsp, $16, %rsp +; movq %rbp, %rsp +; popq %rbp +; ret function %f27(i128) -> i128 { block0(v0: i128): @@ -995,33 +820,26 @@ block0(v0: i128): return v1 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 21) -; Inst 0: pushq %rbp -; Inst 1: movq %rsp, %rbp -; Inst 2: movabsq $-1, %rcx -; Inst 3: bsrq %rsi, %rax -; Inst 4: cmovzq %rcx, %rax -; Inst 5: movl $63, %esi -; Inst 6: subq %rax, %rsi -; Inst 7: movabsq $-1, %rax -; Inst 8: bsrq %rdi, %rcx -; Inst 9: cmovzq %rax, %rcx -; Inst 10: movl $63, %edi -; Inst 11: subq %rcx, %rdi -; Inst 12: addq $64, %rdi -; Inst 13: cmpq $64, %rsi -; Inst 14: cmovnzq %rsi, %rdi -; Inst 15: xorq %rsi, %rsi -; Inst 16: movq %rdi, %rax -; Inst 17: movq %rsi, %rdx -; Inst 18: movq %rbp, %rsp -; Inst 19: popq %rbp -; Inst 20: ret -; }} +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movabsq $-1, %r9 +; bsrq %rsi, %rax +; cmovzq %r9, %rax, %rax +; movl $63, %r8d +; subq %r8, %rax, %r8 +; movabsq $-1, %rsi +; bsrq %rdi, %rcx +; cmovzq %rsi, %rcx, %rcx +; movl $63, %eax +; subq %rax, %rcx, %rax +; addq %rax, $64, %rax +; cmpq $64, %r8 +; cmovnzq %r8, %rax, %rax +; xorq %rdx, %rdx, %rdx +; movq %rbp, %rsp +; popq %rbp +; ret function %f28(i128) -> i128 { block0(v0: i128): @@ -1029,30 +847,22 @@ block0(v0: i128): return v1 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 18) -; Inst 0: pushq %rbp -; Inst 1: movq %rsp, %rbp -; Inst 2: movq %rsi, %rax -; Inst 3: movl $64, %ecx -; Inst 4: bsfq %rdi, %rsi -; Inst 5: cmovzq %rcx, %rsi -; Inst 6: movl $64, %ecx -; Inst 7: bsfq %rax, %rdi -; Inst 8: cmovzq %rcx, %rdi -; Inst 9: addq $64, %rdi -; Inst 10: cmpq $64, %rsi -; Inst 11: cmovzq %rdi, %rsi -; Inst 12: xorq %rdi, %rdi -; Inst 13: movq %rsi, %rax -; Inst 14: movq %rdi, %rdx -; Inst 15: movq %rbp, %rsp -; Inst 16: popq %rbp -; Inst 17: ret -; }} +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movl $64, %r9d +; bsfq %rdi, %rax +; cmovzq %r9, %rax, %rax +; movl $64, %edx +; bsfq %rsi, %rsi +; cmovzq %rdx, %rsi, %rsi +; addq %rsi, $64, %rsi +; cmpq $64, %rax +; cmovzq %rsi, %rax, %rax +; xorq %rdx, %rdx, %rdx +; movq %rbp, %rsp +; popq %rbp +; ret function %f29(i8, i128) -> i8 { block0(v0: i8, v1: i128): @@ -1060,20 +870,16 @@ block0(v0: i8, v1: i128): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 8) -; Inst 0: pushq %rbp -; Inst 1: movq %rsp, %rbp -; Inst 2: movq %rsi, %rcx -; Inst 3: shlb %cl, %dil -; Inst 4: movq %rdi, %rax -; Inst 5: movq %rbp, %rsp -; Inst 6: popq %rbp -; Inst 7: ret -; }} +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq %rsi, %r9 +; movq %r9, %rcx +; shlb %cl, %dil, %dil +; movq %rdi, %rax +; movq %rbp, %rsp +; popq %rbp +; ret function %f30(i128, i128) -> i128 { block0(v0: i128, v1: i128): @@ -1081,36 +887,27 @@ block0(v0: i128, v1: i128): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 24) -; Inst 0: pushq %rbp -; Inst 1: movq %rsp, %rbp -; Inst 2: movq %rdi, %rax -; Inst 3: movq %rsi, %rdi -; Inst 4: movq %rax, %rsi -; Inst 5: movq %rdx, %rcx -; Inst 6: shlq %cl, %rsi -; Inst 7: movq %rdx, %rcx -; Inst 8: shlq %cl, %rdi -; Inst 9: movl $64, %ecx -; Inst 10: subq %rdx, %rcx -; Inst 11: shrq %cl, %rax -; Inst 12: xorq %rcx, %rcx -; Inst 13: testq $127, %rdx -; Inst 14: cmovzq %rcx, %rax -; Inst 15: orq %rdi, %rax -; Inst 16: testq $64, %rdx -; Inst 17: cmovzq %rsi, %rcx -; Inst 18: cmovzq %rax, %rsi -; Inst 19: movq %rcx, %rax -; Inst 20: movq %rsi, %rdx -; Inst 21: movq %rbp, %rsp -; Inst 22: popq %rbp -; Inst 23: ret -; }} +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq %rdx, %r8 +; movq %r8, %rcx +; movq %rdi, %rdx +; shlq %cl, %rdx, %rdx +; shlq %cl, %rsi, %rsi +; movl $64, %ecx +; subq %rcx, %r8, %rcx +; shrq %cl, %rdi, %rdi +; xorq %rax, %rax, %rax +; testq $127, %r8 +; cmovzq %rax, %rdi, %rdi +; orq %rdi, %rsi, %rdi +; testq $64, %r8 +; cmovzq %rdx, %rax, %rax +; cmovzq %rdi, %rdx, %rdx +; movq %rbp, %rsp +; popq %rbp +; ret function %f31(i128, i128) -> i128 { block0(v0: i128, v1: i128): @@ -1118,37 +915,29 @@ block0(v0: i128, v1: i128): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 25) -; Inst 0: pushq %rbp -; Inst 1: movq %rsp, %rbp -; Inst 2: movq %rsi, %rax -; Inst 3: movq %rdx, %rcx -; Inst 4: shrq %cl, %rdi -; Inst 5: movq %rax, %rsi -; Inst 6: movq %rdx, %rcx -; Inst 7: shrq %cl, %rsi -; Inst 8: movl $64, %ecx -; Inst 9: subq %rdx, %rcx -; Inst 10: shlq %cl, %rax -; Inst 11: xorq %rcx, %rcx -; Inst 12: testq $127, %rdx -; Inst 13: cmovzq %rcx, %rax -; Inst 14: orq %rdi, %rax -; Inst 15: xorq %rcx, %rcx -; Inst 16: testq $64, %rdx -; Inst 17: movq %rsi, %rdi -; Inst 18: cmovzq %rax, %rdi -; Inst 19: cmovzq %rsi, %rcx -; Inst 20: movq %rdi, %rax -; Inst 21: movq %rcx, %rdx -; Inst 22: movq %rbp, %rsp -; Inst 23: popq %rbp -; Inst 24: ret -; }} +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq %rdx, %r9 +; movq %r9, %rcx +; shrq %cl, %rdi, %rdi +; movq %rsi, %r8 +; shrq %cl, %r8, %r8 +; movl $64, %ecx +; subq %rcx, %r9, %rcx +; shlq %cl, %rsi, %rsi +; xorq %rdx, %rdx, %rdx +; testq $127, %r9 +; cmovzq %rdx, %rsi, %rsi +; orq %rsi, %rdi, %rsi +; xorq %rdx, %rdx, %rdx +; testq $64, %r9 +; movq %r8, %rax +; cmovzq %rsi, %rax, %rax +; cmovzq %r8, %rdx, %rdx +; movq %rbp, %rsp +; popq %rbp +; ret function %f32(i128, i128) -> i128 { block0(v0: i128, v1: i128): @@ -1156,38 +945,32 @@ block0(v0: i128, v1: i128): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 26) -; Inst 0: pushq %rbp -; Inst 1: movq %rsp, %rbp -; Inst 2: movq %rdi, %rax -; Inst 3: movq %rdx, %rcx -; Inst 4: shrq %cl, %rax -; Inst 5: movq %rsi, %rdi -; Inst 6: movq %rdx, %rcx -; Inst 7: sarq %cl, %rdi -; Inst 8: movl $64, %ecx -; Inst 9: subq %rdx, %rcx -; Inst 10: movq %rsi, %r8 -; Inst 11: shlq %cl, %r8 -; Inst 12: xorq %rcx, %rcx -; Inst 13: testq $127, %rdx -; Inst 14: cmovzq %rcx, %r8 -; Inst 15: orq %r8, %rax -; Inst 16: sarq $63, %rsi -; Inst 17: testq $64, %rdx -; Inst 18: movq %rdi, %rcx -; Inst 19: cmovzq %rax, %rcx -; Inst 20: cmovzq %rdi, %rsi -; Inst 21: movq %rcx, %rax -; Inst 22: movq %rsi, %rdx -; Inst 23: movq %rbp, %rsp -; Inst 24: popq %rbp -; Inst 25: ret -; }} +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq %rdx, %r8 +; movq %r8, %rcx +; shrq %cl, %rdi, %rdi +; movq %rsi, %rdx +; sarq %cl, %rdx, %rdx +; movl $64, %ecx +; movq %r8, %r11 +; subq %rcx, %r11, %rcx +; movq %rsi, %r8 +; shlq %cl, %r8, %r8 +; xorq %r9, %r9, %r9 +; testq $127, %r11 +; cmovzq %r9, %r8, %r8 +; orq %rdi, %r8, %rdi +; sarq $63, %rsi, %rsi +; testq $64, %r11 +; movq %rdx, %rax +; cmovzq %rdi, %rax, %rax +; cmovzq %rdx, %rsi, %rsi +; movq %rsi, %rdx +; movq %rbp, %rsp +; popq %rbp +; ret function %f33(i128, i128) -> i128 { block0(v0: i128, v1: i128): @@ -1195,60 +978,50 @@ block0(v0: i128, v1: i128): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 48) -; Inst 0: pushq %rbp -; Inst 1: movq %rsp, %rbp -; Inst 2: movq %rdi, %rax -; Inst 3: movq %rdx, %rcx -; Inst 4: shlq %cl, %rax -; Inst 5: movq %rsi, %r8 -; Inst 6: movq %rdx, %rcx -; Inst 7: shlq %cl, %r8 -; Inst 8: movl $64, %ecx -; Inst 9: subq %rdx, %rcx -; Inst 10: movq %rdi, %r9 -; Inst 11: shrq %cl, %r9 -; Inst 12: xorq %rcx, %rcx -; Inst 13: testq $127, %rdx -; Inst 14: cmovzq %rcx, %r9 -; Inst 15: orq %r8, %r9 -; Inst 16: testq $64, %rdx -; Inst 17: movq %rcx, %r8 -; Inst 18: cmovzq %rax, %r8 -; Inst 19: cmovzq %r9, %rax -; Inst 20: movl $128, %r9d -; Inst 21: subq %rdx, %r9 -; Inst 22: movq %rdi, %rdx -; Inst 23: movq %r9, %rcx -; Inst 24: shrq %cl, %rdx -; Inst 25: movq %rsi, %rdi -; Inst 26: movq %r9, %rcx -; Inst 27: shrq %cl, %rdi -; Inst 28: movl $64, %ecx -; Inst 29: subq %r9, %rcx -; Inst 30: shlq %cl, %rsi -; Inst 31: xorq %rcx, %rcx -; Inst 32: testq $127, %r9 -; Inst 33: cmovzq %rcx, %rsi -; Inst 34: orq %rdx, %rsi -; Inst 35: xorq %rdx, %rdx -; Inst 36: testq $64, %r9 -; Inst 37: movq %rdi, %rcx -; Inst 38: cmovzq %rsi, %rcx -; Inst 39: movq %rdx, %rsi -; Inst 40: cmovzq %rdi, %rsi -; Inst 41: orq %rcx, %r8 -; Inst 42: orq %rsi, %rax -; Inst 43: movq %rax, %rdx -; Inst 44: movq %r8, %rax -; Inst 45: movq %rbp, %rsp -; Inst 46: popq %rbp -; Inst 47: ret -; }} +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq %rdx, %r11 +; movq %r11, %rcx +; movq %rdi, %rdx +; shlq %cl, %rdx, %rdx +; movq %rsi, %r9 +; shlq %cl, %r9, %r9 +; movl $64, %ecx +; subq %rcx, %r11, %rcx +; movq %rdi, %r8 +; shrq %cl, %r8, %r8 +; xorq %rax, %rax, %rax +; testq $127, %r11 +; cmovzq %rax, %r8, %r8 +; orq %r8, %r9, %r8 +; testq $64, %r11 +; cmovzq %rdx, %rax, %rax +; cmovzq %r8, %rdx, %rdx +; movl $128, %ecx +; movq %r11, %r8 +; subq %rcx, %r8, %rcx +; shrq %cl, %rdi, %rdi +; movq %rsi, %r9 +; shrq %cl, %r9, %r9 +; movq %rcx, %r8 +; movl $64, %ecx +; subq %rcx, %r8, %rcx +; shlq %cl, %rsi, %rsi +; xorq %r11, %r11, %r11 +; testq $127, %r8 +; cmovzq %r11, %rsi, %rsi +; orq %rsi, %rdi, %rsi +; xorq %r11, %r11, %r11 +; testq $64, %r8 +; movq %r9, %rdi +; cmovzq %rsi, %rdi, %rdi +; cmovzq %r9, %r11, %r11 +; orq %rax, %rdi, %rax +; orq %rdx, %r11, %rdx +; movq %rbp, %rsp +; popq %rbp +; ret function %f34(i128, i128) -> i128 { block0(v0: i128, v1: i128): @@ -1256,55 +1029,49 @@ block0(v0: i128, v1: i128): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 45) -; Inst 0: pushq %rbp -; Inst 1: movq %rsp, %rbp -; Inst 2: movq %rdi, %rax -; Inst 3: movq %rdx, %rcx -; Inst 4: shrq %cl, %rax -; Inst 5: movq %rsi, %r9 -; Inst 6: movq %rdx, %rcx -; Inst 7: shrq %cl, %r9 -; Inst 8: movl $64, %ecx -; Inst 9: subq %rdx, %rcx -; Inst 10: movq %rsi, %r8 -; Inst 11: shlq %cl, %r8 -; Inst 12: xorq %rcx, %rcx -; Inst 13: testq $127, %rdx -; Inst 14: cmovzq %rcx, %r8 -; Inst 15: orq %rax, %r8 -; Inst 16: xorq %rcx, %rcx -; Inst 17: testq $64, %rdx -; Inst 18: movq %r9, %rax -; Inst 19: cmovzq %r8, %rax -; Inst 20: movq %rcx, %r8 -; Inst 21: cmovzq %r9, %r8 -; Inst 22: movl $128, %r9d -; Inst 23: subq %rdx, %r9 -; Inst 24: movq %rdi, %rdx -; Inst 25: movq %r9, %rcx -; Inst 26: shlq %cl, %rdx -; Inst 27: movq %r9, %rcx -; Inst 28: shlq %cl, %rsi -; Inst 29: movl $64, %ecx -; Inst 30: subq %r9, %rcx -; Inst 31: shrq %cl, %rdi -; Inst 32: xorq %rcx, %rcx -; Inst 33: testq $127, %r9 -; Inst 34: cmovzq %rcx, %rdi -; Inst 35: orq %rsi, %rdi -; Inst 36: testq $64, %r9 -; Inst 37: cmovzq %rdx, %rcx -; Inst 38: cmovzq %rdi, %rdx -; Inst 39: orq %rcx, %rax -; Inst 40: orq %rdx, %r8 -; Inst 41: movq %r8, %rdx -; Inst 42: movq %rbp, %rsp -; Inst 43: popq %rbp -; Inst 44: ret -; }} +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq %rdx, %r11 +; movq %r11, %rcx +; movq %rdi, %rax +; shrq %cl, %rax, %rax +; movq %rsi, %r8 +; shrq %cl, %r8, %r8 +; movl $64, %ecx +; subq %rcx, %r11, %rcx +; movq %rsi, %r9 +; shlq %cl, %r9, %r9 +; xorq %rdx, %rdx, %rdx +; testq $127, %r11 +; cmovzq %rdx, %r9, %r9 +; orq %r9, %rax, %r9 +; xorq %rdx, %rdx, %rdx +; testq $64, %r11 +; movq %r8, %rax +; cmovzq %r9, %rax, %rax +; cmovzq %r8, %rdx, %rdx +; movl $128, %ecx +; movq %r11, %r8 +; subq %rcx, %r8, %rcx +; movq %rdi, %r11 +; shlq %cl, %r11, %r11 +; shlq %cl, %rsi, %rsi +; movq %rcx, %r8 +; movl $64, %ecx +; movq %r8, %r9 +; subq %rcx, %r9, %rcx +; shrq %cl, %rdi, %rdi +; xorq %r8, %r8, %r8 +; testq $127, %r9 +; cmovzq %r8, %rdi, %rdi +; orq %rdi, %rsi, %rdi +; testq $64, %r9 +; cmovzq %r11, %r8, %r8 +; cmovzq %rdi, %r11, %r11 +; orq %rax, %r8, %rax +; orq %rdx, %r11, %rdx +; movq %rbp, %rsp +; popq %rbp +; ret diff --git a/cranelift/filetests/filetests/isa/x64/ifcmp-sp.clif b/cranelift/filetests/filetests/isa/x64/ifcmp-sp.clif index 0a1c8ac7c6..b3c82b4460 100644 --- a/cranelift/filetests/filetests/isa/x64/ifcmp-sp.clif +++ b/cranelift/filetests/filetests/isa/x64/ifcmp-sp.clif @@ -16,30 +16,19 @@ block2: return v3 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (successor: Block 1) -; (successor: Block 2) -; (instruction range: 0 .. 4) -; Inst 0: pushq %rbp -; Inst 1: movq %rsp, %rbp -; Inst 2: cmpq %rsp, %rdi -; Inst 3: jnbe label1; j label2 -; Block 1: -; (original IR block: block1) -; (instruction range: 4 .. 8) -; Inst 4: xorl %eax, %eax -; Inst 5: movq %rbp, %rsp -; Inst 6: popq %rbp -; Inst 7: ret -; Block 2: -; (original IR block: block2) -; (instruction range: 8 .. 12) -; Inst 8: movl $1, %eax -; Inst 9: movq %rbp, %rsp -; Inst 10: popq %rbp -; Inst 11: ret -; }} +; pushq %rbp +; movq %rsp, %rbp +; block0: +; cmpq %rsp, %rdi +; jnbe label1; j label2 +; block1: +; xorl %eax, %eax, %eax +; movq %rbp, %rsp +; popq %rbp +; ret +; block2: +; movl $1, %eax +; movq %rbp, %rsp +; popq %rbp +; ret diff --git a/cranelift/filetests/filetests/isa/x64/load-op.clif b/cranelift/filetests/filetests/isa/x64/load-op.clif index 85c26dec3e..d2fa2b9f82 100644 --- a/cranelift/filetests/filetests/isa/x64/load-op.clif +++ b/cranelift/filetests/filetests/isa/x64/load-op.clif @@ -5,7 +5,7 @@ function %add_from_mem_u32_1(i64, i32) -> i32 { block0(v0: i64, v1: i32): v2 = load.i32 v0 v3 = iadd.i32 v2, v1 - ; check: addl 0(%rdi), %esi + ; check: addl %esi, 0(%rdi), %esi return v3 } @@ -13,7 +13,7 @@ function %add_from_mem_u32_2(i64, i32) -> i32 { block0(v0: i64, v1: i32): v2 = load.i32 v0 v3 = iadd.i32 v1, v2 - ; check: addl 0(%rdi), %esi + ; check: addl %esi, 0(%rdi), %esi return v3 } @@ -21,7 +21,7 @@ function %add_from_mem_u64_1(i64, i64) -> i64 { block0(v0: i64, v1: i64): v2 = load.i64 v0 v3 = iadd.i64 v2, v1 - ; check: addq 0(%rdi), %rsi + ; check: addq %rsi, 0(%rdi), %rsi return v3 } @@ -29,7 +29,7 @@ function %add_from_mem_u64_2(i64, i64) -> i64 { block0(v0: i64, v1: i64): v2 = load.i64 v0 v3 = iadd.i64 v1, v2 - ; check: addq 0(%rdi), %rsi + ; check: addq %rsi, 0(%rdi), %rsi return v3 } @@ -39,8 +39,8 @@ function %add_from_mem_not_narrow(i64, i8) -> i8 { block0(v0: i64, v1: i8): v2 = load.i8 v0 v3 = iadd.i8 v2, v1 - ; check: movzbq 0(%rdi), %rdi - ; nextln: addl %esi, %edi + ; check: movzbq 0(%rdi), %rax + ; nextln: addl %eax, %esi, %eax return v3 } @@ -51,12 +51,11 @@ block0(v0: i64, v1: i64): store.i64 v3, v1 v4 = load.i64 v3 return v4 - ; check: movq 0(%rdi), %rax - ; nextln: movq %rax, %rcx - ; nextln: addq %rdi, %rcx - ; nextln: movq %rcx, 0(%rsi) - ; nextln: movq 0(%rax,%rdi,1), %rsi - ; nextln: movq %rsi, %rax + ; check: movq 0(%rdi), %r11 + ; nextln: movq %r11, %rax + ; nextln: addq %rax, %rdi, %rax + ; nextln: movq %rax, 0(%rsi) + ; nextln: movq 0(%r11,%rdi,1), %rax } function %merge_scalar_to_vector(i64) -> i32x4 { diff --git a/cranelift/filetests/filetests/isa/x64/move-elision.clif b/cranelift/filetests/filetests/isa/x64/move-elision.clif index 45e631390c..af16a95c83 100644 --- a/cranelift/filetests/filetests/isa/x64/move-elision.clif +++ b/cranelift/filetests/filetests/isa/x64/move-elision.clif @@ -13,15 +13,10 @@ block0(v0: i32x4): return v3 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 5) -; Inst 0: pushq %rbp -; Inst 1: movq %rsp, %rbp -; Inst 2: movq %rbp, %rsp -; Inst 3: popq %rbp -; Inst 4: ret -; }} +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq %rbp, %rsp +; popq %rbp +; ret diff --git a/cranelift/filetests/filetests/isa/x64/popcnt-use-popcnt.clif b/cranelift/filetests/filetests/isa/x64/popcnt-use-popcnt.clif index e4e7c6b828..09309733a9 100644 --- a/cranelift/filetests/filetests/isa/x64/popcnt-use-popcnt.clif +++ b/cranelift/filetests/filetests/isa/x64/popcnt-use-popcnt.clif @@ -7,19 +7,13 @@ block0(v0: i64): return v1 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 7) -; Inst 0: pushq %rbp -; Inst 1: movq %rsp, %rbp -; Inst 2: popcntq %rdi, %rsi -; Inst 3: movq %rsi, %rax -; Inst 4: movq %rbp, %rsp -; Inst 5: popq %rbp -; Inst 6: ret -; }} +; pushq %rbp +; movq %rsp, %rbp +; block0: +; popcntq %rdi, %rax +; movq %rbp, %rsp +; popq %rbp +; ret function %popcnt(i32) -> i32 { block0(v0: i32): @@ -27,17 +21,11 @@ block0(v0: i32): return v1 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 7) -; Inst 0: pushq %rbp -; Inst 1: movq %rsp, %rbp -; Inst 2: popcntl %edi, %esi -; Inst 3: movq %rsi, %rax -; Inst 4: movq %rbp, %rsp -; Inst 5: popq %rbp -; Inst 6: ret -; }} +; pushq %rbp +; movq %rsp, %rbp +; block0: +; popcntl %edi, %eax +; movq %rbp, %rsp +; popq %rbp +; ret diff --git a/cranelift/filetests/filetests/isa/x64/popcnt.clif b/cranelift/filetests/filetests/isa/x64/popcnt.clif index 549fa9c4bf..94582c1d12 100644 --- a/cranelift/filetests/filetests/isa/x64/popcnt.clif +++ b/cranelift/filetests/filetests/isa/x64/popcnt.clif @@ -7,37 +7,31 @@ block0(v0: i64): return v1 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 25) -; Inst 0: pushq %rbp -; Inst 1: movq %rsp, %rbp -; Inst 2: movq %rdi, %rax -; Inst 3: shrq $1, %rax -; Inst 4: movabsq $8608480567731124087, %rsi -; Inst 5: andq %rsi, %rax -; Inst 6: subq %rax, %rdi -; Inst 7: shrq $1, %rax -; Inst 8: andq %rsi, %rax -; Inst 9: subq %rax, %rdi -; Inst 10: shrq $1, %rax -; Inst 11: andq %rsi, %rax -; Inst 12: subq %rax, %rdi -; Inst 13: movq %rdi, %rsi -; Inst 14: shrq $4, %rsi -; Inst 15: addq %rdi, %rsi -; Inst 16: movabsq $1085102592571150095, %rdi -; Inst 17: andq %rdi, %rsi -; Inst 18: movabsq $72340172838076673, %rdi -; Inst 19: imulq %rdi, %rsi -; Inst 20: shrq $56, %rsi -; Inst 21: movq %rsi, %rax -; Inst 22: movq %rbp, %rsp -; Inst 23: popq %rbp -; Inst 24: ret -; }} +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq %rdi, %rcx +; shrq $1, %rcx, %rcx +; movabsq $8608480567731124087, %r8 +; andq %rcx, %r8, %rcx +; subq %rdi, %rcx, %rdi +; shrq $1, %rcx, %rcx +; andq %rcx, %r8, %rcx +; subq %rdi, %rcx, %rdi +; shrq $1, %rcx, %rcx +; andq %rcx, %r8, %rcx +; subq %rdi, %rcx, %rdi +; movq %rdi, %rax +; shrq $4, %rax, %rax +; addq %rax, %rdi, %rax +; movabsq $1085102592571150095, %rcx +; andq %rax, %rcx, %rax +; movabsq $72340172838076673, %r11 +; imulq %rax, %r11, %rax +; shrq $56, %rax, %rax +; movq %rbp, %rsp +; popq %rbp +; ret function %popcnt64load(i64) -> i64 { block0(v0: i64): @@ -46,38 +40,32 @@ block0(v0: i64): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 26) -; Inst 0: pushq %rbp -; Inst 1: movq %rsp, %rbp -; Inst 2: movq 0(%rdi), %rdi -; Inst 3: movq %rdi, %rax -; Inst 4: shrq $1, %rax -; Inst 5: movabsq $8608480567731124087, %rsi -; Inst 6: andq %rsi, %rax -; Inst 7: subq %rax, %rdi -; Inst 8: shrq $1, %rax -; Inst 9: andq %rsi, %rax -; Inst 10: subq %rax, %rdi -; Inst 11: shrq $1, %rax -; Inst 12: andq %rsi, %rax -; Inst 13: subq %rax, %rdi -; Inst 14: movq %rdi, %rsi -; Inst 15: shrq $4, %rsi -; Inst 16: addq %rdi, %rsi -; Inst 17: movabsq $1085102592571150095, %rdi -; Inst 18: andq %rdi, %rsi -; Inst 19: movabsq $72340172838076673, %rdi -; Inst 20: imulq %rdi, %rsi -; Inst 21: shrq $56, %rsi -; Inst 22: movq %rsi, %rax -; Inst 23: movq %rbp, %rsp -; Inst 24: popq %rbp -; Inst 25: ret -; }} +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq 0(%rdi), %rdx +; movq %rdx, %r8 +; shrq $1, %r8, %r8 +; movabsq $8608480567731124087, %r9 +; andq %r8, %r9, %r8 +; subq %rdx, %r8, %rdx +; shrq $1, %r8, %r8 +; andq %r8, %r9, %r8 +; subq %rdx, %r8, %rdx +; shrq $1, %r8, %r8 +; andq %r8, %r9, %r8 +; subq %rdx, %r8, %rdx +; movq %rdx, %rax +; shrq $4, %rax, %rax +; addq %rax, %rdx, %rax +; movabsq $1085102592571150095, %rdx +; andq %rax, %rdx, %rax +; movabsq $72340172838076673, %rsi +; imulq %rax, %rsi, %rax +; shrq $56, %rax, %rax +; movq %rbp, %rsp +; popq %rbp +; ret function %popcnt32(i32) -> i32 { block0(v0: i32): @@ -85,35 +73,29 @@ block0(v0: i32): return v1 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 23) -; Inst 0: pushq %rbp -; Inst 1: movq %rsp, %rbp -; Inst 2: movq %rdi, %rax -; Inst 3: shrl $1, %eax -; Inst 4: movl $2004318071, %esi -; Inst 5: andl %esi, %eax -; Inst 6: subl %eax, %edi -; Inst 7: shrl $1, %eax -; Inst 8: andl %esi, %eax -; Inst 9: subl %eax, %edi -; Inst 10: shrl $1, %eax -; Inst 11: andl %esi, %eax -; Inst 12: subl %eax, %edi -; Inst 13: movq %rdi, %rsi -; Inst 14: shrl $4, %esi -; Inst 15: addl %edi, %esi -; Inst 16: andl $252645135, %esi -; Inst 17: imull $16843009, %esi -; Inst 18: shrl $24, %esi -; Inst 19: movq %rsi, %rax -; Inst 20: movq %rbp, %rsp -; Inst 21: popq %rbp -; Inst 22: ret -; }} +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq %rdi, %rcx +; shrl $1, %ecx, %ecx +; movl $2004318071, %r8d +; andl %ecx, %r8d, %ecx +; subl %edi, %ecx, %edi +; shrl $1, %ecx, %ecx +; andl %ecx, %r8d, %ecx +; subl %edi, %ecx, %edi +; shrl $1, %ecx, %ecx +; andl %ecx, %r8d, %ecx +; subl %edi, %ecx, %edi +; movq %rdi, %rax +; shrl $4, %eax, %eax +; addl %eax, %edi, %eax +; andl %eax, $252645135, %eax +; imull %eax, $16843009, %eax +; shrl $24, %eax, %eax +; movq %rbp, %rsp +; popq %rbp +; ret function %popcnt32load(i64) -> i32 { block0(v0: i64): @@ -122,34 +104,28 @@ block0(v0: i64): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 24) -; Inst 0: pushq %rbp -; Inst 1: movq %rsp, %rbp -; Inst 2: movl 0(%rdi), %edi -; Inst 3: movq %rdi, %rax -; Inst 4: shrl $1, %eax -; Inst 5: movl $2004318071, %esi -; Inst 6: andl %esi, %eax -; Inst 7: subl %eax, %edi -; Inst 8: shrl $1, %eax -; Inst 9: andl %esi, %eax -; Inst 10: subl %eax, %edi -; Inst 11: shrl $1, %eax -; Inst 12: andl %esi, %eax -; Inst 13: subl %eax, %edi -; Inst 14: movq %rdi, %rsi -; Inst 15: shrl $4, %esi -; Inst 16: addl %edi, %esi -; Inst 17: andl $252645135, %esi -; Inst 18: imull $16843009, %esi -; Inst 19: shrl $24, %esi -; Inst 20: movq %rsi, %rax -; Inst 21: movq %rbp, %rsp -; Inst 22: popq %rbp -; Inst 23: ret -; }} +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movl 0(%rdi), %edx +; movq %rdx, %r8 +; shrl $1, %r8d, %r8d +; movl $2004318071, %r9d +; andl %r8d, %r9d, %r8d +; subl %edx, %r8d, %edx +; shrl $1, %r8d, %r8d +; andl %r8d, %r9d, %r8d +; subl %edx, %r8d, %edx +; shrl $1, %r8d, %r8d +; andl %r8d, %r9d, %r8d +; subl %edx, %r8d, %edx +; movq %rdx, %rax +; shrl $4, %eax, %eax +; addl %eax, %edx, %eax +; andl %eax, $252645135, %eax +; imull %eax, $16843009, %eax +; shrl $24, %eax, %eax +; movq %rbp, %rsp +; popq %rbp +; ret diff --git a/cranelift/filetests/filetests/isa/x64/probestack.clif b/cranelift/filetests/filetests/isa/x64/probestack.clif index 586152b192..d00509e318 100644 --- a/cranelift/filetests/filetests/isa/x64/probestack.clif +++ b/cranelift/filetests/filetests/isa/x64/probestack.clif @@ -10,21 +10,15 @@ block0: return v1 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 11) -; Inst 0: pushq %rbp -; Inst 1: movq %rsp, %rbp -; Inst 2: movl $100000, %eax -; Inst 3: call LibCall(Probestack) -; Inst 4: subq $100000, %rsp -; Inst 5: lea rsp(0 + virtual offset), %rsi -; Inst 6: movq %rsi, %rax -; Inst 7: addq $100000, %rsp -; Inst 8: movq %rbp, %rsp -; Inst 9: popq %rbp -; Inst 10: ret -; }} +; pushq %rbp +; movq %rsp, %rbp +; movl $100000, %eax +; call LibCall(Probestack) +; subq %rsp, $100000, %rsp +; block0: +; lea rsp(0 + virtual offset), %rax +; addq %rsp, $100000, %rsp +; movq %rbp, %rsp +; popq %rbp +; ret diff --git a/cranelift/filetests/filetests/isa/x64/select-i128.clif b/cranelift/filetests/filetests/isa/x64/select-i128.clif index c6b80bd345..5317a22872 100644 --- a/cranelift/filetests/filetests/isa/x64/select-i128.clif +++ b/cranelift/filetests/filetests/isa/x64/select-i128.clif @@ -10,25 +10,19 @@ block0(v0: i32, v1: i128, v2: i128): return v5 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 11) -; Inst 0: pushq %rbp -; Inst 1: movq %rsp, %rbp -; Inst 2: movl $42, %eax -; Inst 3: cmpl %eax, %edi -; Inst 4: cmovzq %rsi, %rcx -; Inst 5: cmovzq %rdx, %r8 -; Inst 6: movq %rcx, %rax -; Inst 7: movq %r8, %rdx -; Inst 8: movq %rbp, %rsp -; Inst 9: popq %rbp -; Inst 10: ret -; }} +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movl $42, %r9d +; cmpl %r9d, %edi +; cmovzq %rsi, %rcx, %rcx +; cmovzq %rdx, %r8, %r8 +; movq %rcx, %rax +; movq %r8, %rdx +; movq %rbp, %rsp +; popq %rbp +; ret -;; Test for issue: https://github.com/bytecodealliance/wasmtime/issues/3963. function %f1(f32, i128, i128) -> i128 { block0(v0: f32, v1: i128, v2: i128): v3 = fcmp.f32 eq v0, v0 @@ -36,22 +30,17 @@ block0(v0: f32, v1: i128, v2: i128): return v4 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 12) -; Inst 0: pushq %rbp -; Inst 1: movq %rsp, %rbp -; Inst 2: ucomiss %xmm0, %xmm0 -; Inst 3: cmovnzq %rdx, %rdi -; Inst 4: cmovpq %rdx, %rdi -; Inst 5: cmovnzq %rcx, %rsi -; Inst 6: cmovpq %rcx, %rsi -; Inst 7: movq %rdi, %rax -; Inst 8: movq %rsi, %rdx -; Inst 9: movq %rbp, %rsp -; Inst 10: popq %rbp -; Inst 11: ret -; }} +; pushq %rbp +; movq %rsp, %rbp +; block0: +; ucomiss %xmm0, %xmm0 +; cmovnzq %rdx, %rdi, %rdi +; cmovpq %rdx, %rdi, %rdi +; cmovnzq %rcx, %rsi, %rsi +; cmovpq %rcx, %rsi, %rsi +; movq %rdi, %rax +; movq %rsi, %rdx +; movq %rbp, %rsp +; popq %rbp +; ret diff --git a/cranelift/filetests/filetests/isa/x64/simd-bitwise-compile.clif b/cranelift/filetests/filetests/isa/x64/simd-bitwise-compile.clif index b59f8ddf45..7112afa311 100644 --- a/cranelift/filetests/filetests/isa/x64/simd-bitwise-compile.clif +++ b/cranelift/filetests/filetests/isa/x64/simd-bitwise-compile.clif @@ -8,18 +8,13 @@ block0(v0: f32x4, v1: f32x4): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 6) -; Inst 0: pushq %rbp -; Inst 1: movq %rsp, %rbp -; Inst 2: andps %xmm1, %xmm0 -; Inst 3: movq %rbp, %rsp -; Inst 4: popq %rbp -; Inst 5: ret -; }} +; pushq %rbp +; movq %rsp, %rbp +; block0: +; andps %xmm0, %xmm0, %xmm1 +; movq %rbp, %rsp +; popq %rbp +; ret function %band_f64x2(f64x2, f64x2) -> f64x2 { block0(v0: f64x2, v1: f64x2): @@ -27,18 +22,13 @@ block0(v0: f64x2, v1: f64x2): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 6) -; Inst 0: pushq %rbp -; Inst 1: movq %rsp, %rbp -; Inst 2: andpd %xmm1, %xmm0 -; Inst 3: movq %rbp, %rsp -; Inst 4: popq %rbp -; Inst 5: ret -; }} +; pushq %rbp +; movq %rsp, %rbp +; block0: +; andpd %xmm0, %xmm0, %xmm1 +; movq %rbp, %rsp +; popq %rbp +; ret function %band_i32x4(i32x4, i32x4) -> i32x4 { block0(v0: i32x4, v1: i32x4): @@ -46,18 +36,13 @@ block0(v0: i32x4, v1: i32x4): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 6) -; Inst 0: pushq %rbp -; Inst 1: movq %rsp, %rbp -; Inst 2: pand %xmm1, %xmm0 -; Inst 3: movq %rbp, %rsp -; Inst 4: popq %rbp -; Inst 5: ret -; }} +; pushq %rbp +; movq %rsp, %rbp +; block0: +; pand %xmm0, %xmm0, %xmm1 +; movq %rbp, %rsp +; popq %rbp +; ret function %bor_f32x4(f32x4, f32x4) -> f32x4 { block0(v0: f32x4, v1: f32x4): @@ -65,18 +50,13 @@ block0(v0: f32x4, v1: f32x4): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 6) -; Inst 0: pushq %rbp -; Inst 1: movq %rsp, %rbp -; Inst 2: orps %xmm1, %xmm0 -; Inst 3: movq %rbp, %rsp -; Inst 4: popq %rbp -; Inst 5: ret -; }} +; pushq %rbp +; movq %rsp, %rbp +; block0: +; orps %xmm0, %xmm0, %xmm1 +; movq %rbp, %rsp +; popq %rbp +; ret function %bor_f64x2(f64x2, f64x2) -> f64x2 { block0(v0: f64x2, v1: f64x2): @@ -84,18 +64,13 @@ block0(v0: f64x2, v1: f64x2): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 6) -; Inst 0: pushq %rbp -; Inst 1: movq %rsp, %rbp -; Inst 2: orpd %xmm1, %xmm0 -; Inst 3: movq %rbp, %rsp -; Inst 4: popq %rbp -; Inst 5: ret -; }} +; pushq %rbp +; movq %rsp, %rbp +; block0: +; orpd %xmm0, %xmm0, %xmm1 +; movq %rbp, %rsp +; popq %rbp +; ret function %bor_i32x4(i32x4, i32x4) -> i32x4 { block0(v0: i32x4, v1: i32x4): @@ -103,18 +78,13 @@ block0(v0: i32x4, v1: i32x4): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 6) -; Inst 0: pushq %rbp -; Inst 1: movq %rsp, %rbp -; Inst 2: por %xmm1, %xmm0 -; Inst 3: movq %rbp, %rsp -; Inst 4: popq %rbp -; Inst 5: ret -; }} +; pushq %rbp +; movq %rsp, %rbp +; block0: +; por %xmm0, %xmm0, %xmm1 +; movq %rbp, %rsp +; popq %rbp +; ret function %bxor_f32x4(f32x4, f32x4) -> f32x4 { block0(v0: f32x4, v1: f32x4): @@ -122,18 +92,13 @@ block0(v0: f32x4, v1: f32x4): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 6) -; Inst 0: pushq %rbp -; Inst 1: movq %rsp, %rbp -; Inst 2: xorps %xmm1, %xmm0 -; Inst 3: movq %rbp, %rsp -; Inst 4: popq %rbp -; Inst 5: ret -; }} +; pushq %rbp +; movq %rsp, %rbp +; block0: +; xorps %xmm0, %xmm0, %xmm1 +; movq %rbp, %rsp +; popq %rbp +; ret function %bxor_f64x2(f64x2, f64x2) -> f64x2 { block0(v0: f64x2, v1: f64x2): @@ -141,18 +106,13 @@ block0(v0: f64x2, v1: f64x2): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 6) -; Inst 0: pushq %rbp -; Inst 1: movq %rsp, %rbp -; Inst 2: xorpd %xmm1, %xmm0 -; Inst 3: movq %rbp, %rsp -; Inst 4: popq %rbp -; Inst 5: ret -; }} +; pushq %rbp +; movq %rsp, %rbp +; block0: +; xorpd %xmm0, %xmm0, %xmm1 +; movq %rbp, %rsp +; popq %rbp +; ret function %bxor_i32x4(i32x4, i32x4) -> i32x4 { block0(v0: i32x4, v1: i32x4): @@ -160,18 +120,13 @@ block0(v0: i32x4, v1: i32x4): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 6) -; Inst 0: pushq %rbp -; Inst 1: movq %rsp, %rbp -; Inst 2: pxor %xmm1, %xmm0 -; Inst 3: movq %rbp, %rsp -; Inst 4: popq %rbp -; Inst 5: ret -; }} +; pushq %rbp +; movq %rsp, %rbp +; block0: +; pxor %xmm0, %xmm0, %xmm1 +; movq %rbp, %rsp +; popq %rbp +; ret function %bitselect_i16x8() -> i16x8 { block0: @@ -182,23 +137,18 @@ block0: return v3 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 11) -; Inst 0: pushq %rbp -; Inst 1: movq %rsp, %rbp -; Inst 2: load_const VCodeConstant(0), %xmm0 -; Inst 3: load_const VCodeConstant(0), %xmm1 -; Inst 4: load_const VCodeConstant(0), %xmm2 -; Inst 5: pand %xmm0, %xmm1 -; Inst 6: pandn %xmm2, %xmm0 -; Inst 7: por %xmm1, %xmm0 -; Inst 8: movq %rbp, %rsp -; Inst 9: popq %rbp -; Inst 10: ret -; }} +; pushq %rbp +; movq %rsp, %rbp +; block0: +; load_const VCodeConstant(0), %xmm0 +; load_const VCodeConstant(0), %xmm5 +; load_const VCodeConstant(0), %xmm4 +; pand %xmm5, %xmm5, %xmm0 +; pandn %xmm0, %xmm0, %xmm4 +; por %xmm0, %xmm0, %xmm5 +; movq %rbp, %rsp +; popq %rbp +; ret function %vselect_i16x8(b16x8, i16x8, i16x8) -> i16x8 { block0(v0: b16x8, v1: i16x8, v2: i16x8): @@ -206,19 +156,14 @@ block0(v0: b16x8, v1: i16x8, v2: i16x8): return v3 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 7) -; Inst 0: pushq %rbp -; Inst 1: movq %rsp, %rbp -; Inst 2: pblendvb %xmm1, %xmm2 -; Inst 3: movdqa %xmm2, %xmm0 -; Inst 4: movq %rbp, %rsp -; Inst 5: popq %rbp -; Inst 6: ret -; }} +; pushq %rbp +; movq %rsp, %rbp +; block0: +; pblendvb %xmm2, %xmm2, %xmm1 +; movdqa %xmm2, %xmm0 +; movq %rbp, %rsp +; popq %rbp +; ret function %vselect_f32x4(b32x4, f32x4, f32x4) -> f32x4 { block0(v0: b32x4, v1: f32x4, v2: f32x4): @@ -226,19 +171,14 @@ block0(v0: b32x4, v1: f32x4, v2: f32x4): return v3 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 7) -; Inst 0: pushq %rbp -; Inst 1: movq %rsp, %rbp -; Inst 2: blendvps %xmm1, %xmm2 -; Inst 3: movdqa %xmm2, %xmm0 -; Inst 4: movq %rbp, %rsp -; Inst 5: popq %rbp -; Inst 6: ret -; }} +; pushq %rbp +; movq %rsp, %rbp +; block0: +; blendvps %xmm2, %xmm2, %xmm1 +; movdqa %xmm2, %xmm0 +; movq %rbp, %rsp +; popq %rbp +; ret function %vselect_f64x2(b64x2, f64x2, f64x2) -> f64x2 { block0(v0: b64x2, v1: f64x2, v2: f64x2): @@ -246,19 +186,14 @@ block0(v0: b64x2, v1: f64x2, v2: f64x2): return v3 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 7) -; Inst 0: pushq %rbp -; Inst 1: movq %rsp, %rbp -; Inst 2: blendvpd %xmm1, %xmm2 -; Inst 3: movdqa %xmm2, %xmm0 -; Inst 4: movq %rbp, %rsp -; Inst 5: popq %rbp -; Inst 6: ret -; }} +; pushq %rbp +; movq %rsp, %rbp +; block0: +; blendvpd %xmm2, %xmm2, %xmm1 +; movdqa %xmm2, %xmm0 +; movq %rbp, %rsp +; popq %rbp +; ret function %ishl_i8x16(i32) -> i8x16 { block0(v0: i32): @@ -267,24 +202,19 @@ block0(v0: i32): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 12) -; Inst 0: pushq %rbp -; Inst 1: movq %rsp, %rbp -; Inst 2: load_const VCodeConstant(1), %xmm0 -; Inst 3: movd %edi, %xmm1 -; Inst 4: psllw %xmm1, %xmm0 -; Inst 5: lea const(VCodeConstant(0)), %rsi -; Inst 6: shlq $4, %rdi -; Inst 7: movdqu 0(%rsi,%rdi,1), %xmm1 -; Inst 8: pand %xmm1, %xmm0 -; Inst 9: movq %rbp, %rsp -; Inst 10: popq %rbp -; Inst 11: ret -; }} +; pushq %rbp +; movq %rsp, %rbp +; block0: +; load_const VCodeConstant(1), %xmm0 +; movd %edi, %xmm5 +; psllw %xmm0, %xmm5, %xmm0 +; lea const(VCodeConstant(0)), %rax +; shlq $4, %rdi, %rdi +; movdqu 0(%rax,%rdi,1), %xmm13 +; pand %xmm0, %xmm0, %xmm13 +; movq %rbp, %rsp +; popq %rbp +; ret function %ushr_i8x16_imm() -> i8x16 { block0: @@ -294,21 +224,16 @@ block0: return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 9) -; Inst 0: pushq %rbp -; Inst 1: movq %rsp, %rbp -; Inst 2: load_const VCodeConstant(1), %xmm0 -; Inst 3: psrlw $1, %xmm0 -; Inst 4: movdqu const(VCodeConstant(0)), %xmm1 -; Inst 5: pand %xmm1, %xmm0 -; Inst 6: movq %rbp, %rsp -; Inst 7: popq %rbp -; Inst 8: ret -; }} +; pushq %rbp +; movq %rsp, %rbp +; block0: +; load_const VCodeConstant(1), %xmm0 +; psrlw %xmm0, $1, %xmm0 +; movdqu const(VCodeConstant(0)), %xmm5 +; pand %xmm0, %xmm0, %xmm5 +; movq %rbp, %rsp +; popq %rbp +; ret function %sshr_i8x16(i32) -> i8x16 { block0(v0: i32): @@ -317,27 +242,21 @@ block0(v0: i32): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 15) -; Inst 0: pushq %rbp -; Inst 1: movq %rsp, %rbp -; Inst 2: load_const VCodeConstant(0), %xmm2 -; Inst 3: movdqa %xmm2, %xmm0 -; Inst 4: punpcklbw %xmm2, %xmm0 -; Inst 5: movdqa %xmm2, %xmm1 -; Inst 6: punpckhbw %xmm2, %xmm1 -; Inst 7: addl $8, %edi -; Inst 8: movd %edi, %xmm2 -; Inst 9: psraw %xmm2, %xmm0 -; Inst 10: psraw %xmm2, %xmm1 -; Inst 11: packsswb %xmm1, %xmm0 -; Inst 12: movq %rbp, %rsp -; Inst 13: popq %rbp -; Inst 14: ret -; }} +; pushq %rbp +; movq %rsp, %rbp +; block0: +; load_const VCodeConstant(0), %xmm9 +; movdqa %xmm9, %xmm0 +; punpcklbw %xmm0, %xmm0, %xmm9 +; punpckhbw %xmm9, %xmm9, %xmm9 +; addl %edi, $8, %edi +; movd %edi, %xmm11 +; psraw %xmm0, %xmm11, %xmm0 +; psraw %xmm9, %xmm11, %xmm9 +; packsswb %xmm0, %xmm0, %xmm9 +; movq %rbp, %rsp +; popq %rbp +; ret function %sshr_i8x16_imm(i8x16, i32) -> i8x16 { block0(v0: i8x16, v1: i32): @@ -345,25 +264,23 @@ block0(v0: i8x16, v1: i32): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 13) -; Inst 0: pushq %rbp -; Inst 1: movq %rsp, %rbp -; Inst 2: movdqa %xmm0, %xmm1 -; Inst 3: punpcklbw %xmm0, %xmm1 -; Inst 4: movdqa %xmm0, %xmm2 -; Inst 5: punpckhbw %xmm0, %xmm2 -; Inst 6: psraw $11, %xmm1 -; Inst 7: psraw $11, %xmm2 -; Inst 8: packsswb %xmm2, %xmm1 -; Inst 9: movdqa %xmm1, %xmm0 -; Inst 10: movq %rbp, %rsp -; Inst 11: popq %rbp -; Inst 12: ret -; }} +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movdqa %xmm0, %xmm9 +; punpcklbw %xmm9, %xmm9, %xmm0 +; punpckhbw %xmm0, %xmm0, %xmm0 +; movdqa %xmm9, %xmm12 +; psraw %xmm12, $11, %xmm12 +; movdqa %xmm12, %xmm9 +; psraw %xmm0, $11, %xmm0 +; movdqa %xmm9, %xmm1 +; packsswb %xmm1, %xmm1, %xmm0 +; movdqa %xmm1, %xmm9 +; movdqa %xmm9, %xmm0 +; movq %rbp, %rsp +; popq %rbp +; ret function %sshr_i64x2(i64x2, i32) -> i64x2 { block0(v0: i64x2, v1: i32): @@ -371,24 +288,19 @@ block0(v0: i64x2, v1: i32): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 14) -; Inst 0: pushq %rbp -; Inst 1: movq %rsp, %rbp -; Inst 2: pextrd.w $0, %xmm0, %rsi -; Inst 3: pextrd.w $1, %xmm0, %rax -; Inst 4: movq %rdi, %rcx -; Inst 5: sarq %cl, %rsi -; Inst 6: movq %rdi, %rcx -; Inst 7: sarq %cl, %rax -; Inst 8: uninit %xmm0 -; Inst 9: pinsrd.w $0, %rsi, %xmm0 -; Inst 10: pinsrd.w $1, %rax, %xmm0 -; Inst 11: movq %rbp, %rsp -; Inst 12: popq %rbp -; Inst 13: ret -; }} +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq %rdi, %rax +; pextrd.w $0, %xmm0, %r11 +; pextrd.w $1, %xmm0, %rdi +; movq %rax, %rcx +; sarq %cl, %r11, %r11 +; sarq %cl, %rdi, %rdi +; uninit %xmm0 +; pinsrd.w $0, %xmm0, %r11, %xmm0 +; pinsrd.w $1, %xmm0, %rdi, %xmm0 +; movq %rbp, %rsp +; popq %rbp +; ret diff --git a/cranelift/filetests/filetests/isa/x64/simd-comparison-legalize.clif b/cranelift/filetests/filetests/isa/x64/simd-comparison-legalize.clif index 58cd0db90e..6a99987e29 100644 --- a/cranelift/filetests/filetests/isa/x64/simd-comparison-legalize.clif +++ b/cranelift/filetests/filetests/isa/x64/simd-comparison-legalize.clif @@ -8,20 +8,15 @@ block0(v0: i32x4, v1: i32x4): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 8) -; Inst 0: pushq %rbp -; Inst 1: movq %rsp, %rbp -; Inst 2: pcmpeqd %xmm1, %xmm0 -; Inst 3: pcmpeqd %xmm1, %xmm1 -; Inst 4: pxor %xmm1, %xmm0 -; Inst 5: movq %rbp, %rsp -; Inst 6: popq %rbp -; Inst 7: ret -; }} +; pushq %rbp +; movq %rsp, %rbp +; block0: +; pcmpeqd %xmm0, %xmm0, %xmm1 +; pcmpeqd %xmm7, %xmm7, %xmm7 +; pxor %xmm0, %xmm0, %xmm7 +; movq %rbp, %rsp +; popq %rbp +; ret function %icmp_ugt_i32x4(i32x4, i32x4) -> b32x4 { block0(v0: i32x4, v1: i32x4): @@ -29,21 +24,16 @@ block0(v0: i32x4, v1: i32x4): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 9) -; Inst 0: pushq %rbp -; Inst 1: movq %rsp, %rbp -; Inst 2: pmaxud %xmm1, %xmm0 -; Inst 3: pcmpeqd %xmm1, %xmm0 -; Inst 4: pcmpeqd %xmm1, %xmm1 -; Inst 5: pxor %xmm1, %xmm0 -; Inst 6: movq %rbp, %rsp -; Inst 7: popq %rbp -; Inst 8: ret -; }} +; pushq %rbp +; movq %rsp, %rbp +; block0: +; pmaxud %xmm0, %xmm0, %xmm1 +; pcmpeqd %xmm0, %xmm0, %xmm1 +; pcmpeqd %xmm9, %xmm9, %xmm9 +; pxor %xmm0, %xmm0, %xmm9 +; movq %rbp, %rsp +; popq %rbp +; ret function %icmp_sge_i16x8(i16x8, i16x8) -> b16x8 { block0(v0: i16x8, v1: i16x8): @@ -51,21 +41,15 @@ block0(v0: i16x8, v1: i16x8): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 9) -; Inst 0: pushq %rbp -; Inst 1: movq %rsp, %rbp -; Inst 2: movdqa %xmm1, %xmm2 -; Inst 3: movdqa %xmm0, %xmm1 -; Inst 4: pmaxsw %xmm2, %xmm1 -; Inst 5: pcmpeqw %xmm1, %xmm0 -; Inst 6: movq %rbp, %rsp -; Inst 7: popq %rbp -; Inst 8: ret -; }} +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movdqa %xmm0, %xmm5 +; pmaxsw %xmm5, %xmm5, %xmm1 +; pcmpeqw %xmm0, %xmm0, %xmm5 +; movq %rbp, %rsp +; popq %rbp +; ret function %icmp_uge_i8x16(i8x16, i8x16) -> b8x16 { block0(v0: i8x16, v1: i8x16): @@ -73,19 +57,13 @@ block0(v0: i8x16, v1: i8x16): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 9) -; Inst 0: pushq %rbp -; Inst 1: movq %rsp, %rbp -; Inst 2: movdqa %xmm1, %xmm2 -; Inst 3: movdqa %xmm0, %xmm1 -; Inst 4: pmaxub %xmm2, %xmm1 -; Inst 5: pcmpeqb %xmm1, %xmm0 -; Inst 6: movq %rbp, %rsp -; Inst 7: popq %rbp -; Inst 8: ret -; }} +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movdqa %xmm0, %xmm5 +; pmaxub %xmm5, %xmm5, %xmm1 +; pcmpeqb %xmm0, %xmm0, %xmm5 +; movq %rbp, %rsp +; popq %rbp +; ret diff --git a/cranelift/filetests/filetests/isa/x64/simd-lane-access-compile.clif b/cranelift/filetests/filetests/isa/x64/simd-lane-access-compile.clif index e2cdd4dbe1..9d4b7b572e 100644 --- a/cranelift/filetests/filetests/isa/x64/simd-lane-access-compile.clif +++ b/cranelift/filetests/filetests/isa/x64/simd-lane-access-compile.clif @@ -12,24 +12,19 @@ block0: return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 12) -; Inst 0: pushq %rbp -; Inst 1: movq %rsp, %rbp -; Inst 2: load_const VCodeConstant(3), %xmm1 -; Inst 3: load_const VCodeConstant(2), %xmm0 -; Inst 4: load_const VCodeConstant(0), %xmm2 -; Inst 5: pshufb %xmm2, %xmm1 -; Inst 6: load_const VCodeConstant(1), %xmm2 -; Inst 7: pshufb %xmm2, %xmm0 -; Inst 8: orps %xmm1, %xmm0 -; Inst 9: movq %rbp, %rsp -; Inst 10: popq %rbp -; Inst 11: ret -; }} +; pushq %rbp +; movq %rsp, %rbp +; block0: +; load_const VCodeConstant(3), %xmm1 +; load_const VCodeConstant(2), %xmm0 +; load_const VCodeConstant(0), %xmm9 +; pshufb %xmm1, %xmm1, %xmm9 +; load_const VCodeConstant(1), %xmm12 +; pshufb %xmm0, %xmm0, %xmm12 +; orps %xmm0, %xmm0, %xmm1 +; movq %rbp, %rsp +; popq %rbp +; ret function %shuffle_same_ssa_value() -> i8x16 { block0: @@ -38,20 +33,15 @@ block0: return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 8) -; Inst 0: pushq %rbp -; Inst 1: movq %rsp, %rbp -; Inst 2: load_const VCodeConstant(1), %xmm0 -; Inst 3: load_const VCodeConstant(0), %xmm1 -; Inst 4: pshufb %xmm1, %xmm0 -; Inst 5: movq %rbp, %rsp -; Inst 6: popq %rbp -; Inst 7: ret -; }} +; pushq %rbp +; movq %rsp, %rbp +; block0: +; load_const VCodeConstant(1), %xmm0 +; load_const VCodeConstant(0), %xmm5 +; pshufb %xmm0, %xmm0, %xmm5 +; movq %rbp, %rsp +; popq %rbp +; ret function %swizzle() -> i8x16 { block0: @@ -61,23 +51,17 @@ block0: return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 11) -; Inst 0: pushq %rbp -; Inst 1: movq %rsp, %rbp -; Inst 2: load_const VCodeConstant(1), %xmm1 -; Inst 3: load_const VCodeConstant(1), %xmm0 -; Inst 4: load_const VCodeConstant(0), %xmm2 -; Inst 5: paddusb %xmm2, %xmm0 -; Inst 6: pshufb %xmm0, %xmm1 -; Inst 7: movdqa %xmm1, %xmm0 -; Inst 8: movq %rbp, %rsp -; Inst 9: popq %rbp -; Inst 10: ret -; }} +; pushq %rbp +; movq %rsp, %rbp +; block0: +; load_const VCodeConstant(1), %xmm0 +; load_const VCodeConstant(1), %xmm2 +; load_const VCodeConstant(0), %xmm7 +; paddusb %xmm2, %xmm2, %xmm7 +; pshufb %xmm0, %xmm0, %xmm2 +; movq %rbp, %rsp +; popq %rbp +; ret function %splat_i8(i8) -> i8x16 { block0(v0: i8): @@ -85,21 +69,16 @@ block0(v0: i8): return v1 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 9) -; Inst 0: pushq %rbp -; Inst 1: movq %rsp, %rbp -; Inst 2: uninit %xmm0 -; Inst 3: pinsrb $0, %rdi, %xmm0 -; Inst 4: pxor %xmm1, %xmm1 -; Inst 5: pshufb %xmm1, %xmm0 -; Inst 6: movq %rbp, %rsp -; Inst 7: popq %rbp -; Inst 8: ret -; }} +; pushq %rbp +; movq %rsp, %rbp +; block0: +; uninit %xmm0 +; pinsrb $0, %xmm0, %rdi, %xmm0 +; pxor %xmm6, %xmm6, %xmm6 +; pshufb %xmm0, %xmm0, %xmm6 +; movq %rbp, %rsp +; popq %rbp +; ret function %splat_b16() -> b16x8 { block0: @@ -108,22 +87,17 @@ block0: return v1 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 10) -; Inst 0: pushq %rbp -; Inst 1: movq %rsp, %rbp -; Inst 2: movl $65535, %esi -; Inst 3: uninit %xmm0 -; Inst 4: pinsrw $0, %rsi, %xmm0 -; Inst 5: pinsrw $1, %rsi, %xmm0 -; Inst 6: pshufd $0, %xmm0, %xmm0 -; Inst 7: movq %rbp, %rsp -; Inst 8: popq %rbp -; Inst 9: ret -; }} +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movl $65535, %eax +; uninit %xmm0 +; pinsrw $0, %xmm0, %rax, %xmm0 +; pinsrw $1, %xmm0, %rax, %xmm0 +; pshufd $0, %xmm0, %xmm0 +; movq %rbp, %rsp +; popq %rbp +; ret function %splat_i32(i32) -> i32x4 { block0(v0: i32): @@ -131,20 +105,15 @@ block0(v0: i32): return v1 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 8) -; Inst 0: pushq %rbp -; Inst 1: movq %rsp, %rbp -; Inst 2: uninit %xmm0 -; Inst 3: pinsrd $0, %rdi, %xmm0 -; Inst 4: pshufd $0, %xmm0, %xmm0 -; Inst 5: movq %rbp, %rsp -; Inst 6: popq %rbp -; Inst 7: ret -; }} +; pushq %rbp +; movq %rsp, %rbp +; block0: +; uninit %xmm0 +; pinsrd $0, %xmm0, %rdi, %xmm0 +; pshufd $0, %xmm0, %xmm0 +; movq %rbp, %rsp +; popq %rbp +; ret function %splat_f64(f64) -> f64x2 { block0(v0: f64): @@ -152,21 +121,17 @@ block0(v0: f64): return v1 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 9) -; Inst 0: pushq %rbp -; Inst 1: movq %rsp, %rbp -; Inst 2: uninit %xmm1 -; Inst 3: movsd %xmm0, %xmm1 -; Inst 4: movlhps %xmm0, %xmm1 -; Inst 5: movdqa %xmm1, %xmm0 -; Inst 6: movq %rbp, %rsp -; Inst 7: popq %rbp -; Inst 8: ret -; }} +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movdqa %xmm0, %xmm4 +; uninit %xmm0 +; movdqa %xmm4, %xmm5 +; movsd %xmm0, %xmm0, %xmm5 +; movlhps %xmm0, %xmm0, %xmm5 +; movq %rbp, %rsp +; popq %rbp +; ret function %load32_zero_coalesced(i64) -> i32x4 { block0(v0: i64): @@ -175,18 +140,13 @@ block0(v0: i64): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 6) -; Inst 0: pushq %rbp -; Inst 1: movq %rsp, %rbp -; Inst 2: movss 0(%rdi), %xmm0 -; Inst 3: movq %rbp, %rsp -; Inst 4: popq %rbp -; Inst 5: ret -; }} +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movss 0(%rdi), %xmm0 +; movq %rbp, %rsp +; popq %rbp +; ret function %load32_zero_int(i32) -> i32x4 { block0(v0: i32): @@ -194,18 +154,13 @@ block0(v0: i32): return v1 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 6) -; Inst 0: pushq %rbp -; Inst 1: movq %rsp, %rbp -; Inst 2: movd %edi, %xmm0 -; Inst 3: movq %rbp, %rsp -; Inst 4: popq %rbp -; Inst 5: ret -; }} +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movd %edi, %xmm0 +; movq %rbp, %rsp +; popq %rbp +; ret function %load32_zero_float(f32) -> f32x4 { block0(v0: f32): @@ -213,15 +168,10 @@ block0(v0: f32): return v1 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 5) -; Inst 0: pushq %rbp -; Inst 1: movq %rsp, %rbp -; Inst 2: movq %rbp, %rsp -; Inst 3: popq %rbp -; Inst 4: ret -; }} +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq %rbp, %rsp +; popq %rbp +; ret diff --git a/cranelift/filetests/filetests/isa/x64/simd-logical-compile.clif b/cranelift/filetests/filetests/isa/x64/simd-logical-compile.clif index 72fc41e34c..42849a3987 100644 --- a/cranelift/filetests/filetests/isa/x64/simd-logical-compile.clif +++ b/cranelift/filetests/filetests/isa/x64/simd-logical-compile.clif @@ -8,19 +8,14 @@ block0(v0: b32x4): return v1 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 7) -; Inst 0: pushq %rbp -; Inst 1: movq %rsp, %rbp -; Inst 2: pcmpeqd %xmm1, %xmm1 -; Inst 3: pxor %xmm1, %xmm0 -; Inst 4: movq %rbp, %rsp -; Inst 5: popq %rbp -; Inst 6: ret -; }} +; pushq %rbp +; movq %rsp, %rbp +; block0: +; pcmpeqd %xmm3, %xmm3, %xmm3 +; pxor %xmm0, %xmm0, %xmm3 +; movq %rbp, %rsp +; popq %rbp +; ret function %vany_true_b32x4(b32x4) -> b1 { block0(v0: b32x4): @@ -28,20 +23,14 @@ block0(v0: b32x4): return v1 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 8) -; Inst 0: pushq %rbp -; Inst 1: movq %rsp, %rbp -; Inst 2: ptest %xmm0, %xmm0 -; Inst 3: setnz %sil -; Inst 4: movq %rsi, %rax -; Inst 5: movq %rbp, %rsp -; Inst 6: popq %rbp -; Inst 7: ret -; }} +; pushq %rbp +; movq %rsp, %rbp +; block0: +; ptest %xmm0, %xmm0 +; setnz %al +; movq %rbp, %rsp +; popq %rbp +; ret function %vall_true_i64x2(i64x2) -> b1 { block0(v0: i64x2): @@ -49,20 +38,14 @@ block0(v0: i64x2): return v1 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 10) -; Inst 0: pushq %rbp -; Inst 1: movq %rsp, %rbp -; Inst 2: pxor %xmm1, %xmm1 -; Inst 3: pcmpeqq %xmm0, %xmm1 -; Inst 4: ptest %xmm1, %xmm1 -; Inst 5: setz %sil -; Inst 6: movq %rsi, %rax -; Inst 7: movq %rbp, %rsp -; Inst 8: popq %rbp -; Inst 9: ret -; }} +; pushq %rbp +; movq %rsp, %rbp +; block0: +; pxor %xmm4, %xmm4, %xmm4 +; pcmpeqq %xmm4, %xmm4, %xmm0 +; ptest %xmm4, %xmm4 +; setz %al +; movq %rbp, %rsp +; popq %rbp +; ret diff --git a/cranelift/filetests/filetests/isa/x64/store-stack-full-width-i32.clif b/cranelift/filetests/filetests/isa/x64/store-stack-full-width-i32.clif deleted file mode 100644 index c20f816fc2..0000000000 --- a/cranelift/filetests/filetests/isa/x64/store-stack-full-width-i32.clif +++ /dev/null @@ -1,127 +0,0 @@ -test compile -target x86_64 - -;; The goal of this test is to ensure that stack spills of an integer value, -;; which width is less than the machine word's size, cause the full word to be -;; stored, and not only the lower bits. - -;; Because of unsigned extensions which can be transformed into simple moves, -;; the source vreg of the extension operation can be coalesced with its -;; destination vreg, and if it happens to be spill, then the reload may use a -;; reload of a different, larger size. - -function %f0(i32, i32, i32) -> i64 { - fn0 = %g(i32) -> i64 - -; check: pushq %rbp -; nextln: movq %rsp, %rbp -; nextln: subq $$64, %rsp - -;; Stash all the callee-saved registers. - -; nextln: movq %r12, 16(%rsp) -; nextln: movq %r13, 24(%rsp) -; nextln: movq %r14, 32(%rsp) -; nextln: movq %rbx, 40(%rsp) -; nextln: movq %r15, 48(%rsp) - -block0(v0: i32, v1: i32, v2: i32): - ;; First, create enough virtual registers so that the call instructions - ;; causes at least one of them to be spilled onto the stack. - - v3 = iadd.i32 v0, v1 - v4 = iadd.i32 v1, v2 - v5 = iadd.i32 v0, v2 - v6 = iadd.i32 v3, v0 - v7 = iadd.i32 v4, v0 - v8 = iadd.i32 v5, v0 - -; nextln: movq %rdi, %r12 -; nextln: addl %esi, %r12d -; nextln: movq %rsi, %r13 -; nextln: addl %edx, %r13d -; nextln: movq %rdi, %r14 -; nextln: addl %edx, %r14d -; nextln: movq %r12, %rbx -; nextln: addl %edi, %ebx -; nextln: movq %r13, %r15 -; nextln: addl %edi, %r15d -; nextln: movq %r14, %rsi - -;; This should be movq below, not movl. -; nextln: movq %rsi, rsp(0 + virtual offset) - -; nextln: movq rsp(0 + virtual offset), %rsi -; nextln: addl %edi, %esi - - ;; Put an effectful instruction so that the live-ranges of the adds and - ;; uextends are split here, and to prevent the uextend to be emitted - ;; before the call. This will effectively causing the above i32 to be - ;; spilled as an i32, and not a full i64. - - v300 = call fn0(v0) - -;; This should be movq below, not movl. -; nextln: movq %rsi, rsp(0 + virtual offset) - -; nextln: load_ext_name %g+0, %rsi -; nextln: call *%rsi - - v31 = uextend.i64 v3 - v41 = uextend.i64 v4 - v51 = uextend.i64 v5 - v61 = uextend.i64 v6 - v71 = uextend.i64 v7 - v81 = uextend.i64 v8 - - ;; None of the uextends are generated here yet. - - ;; At this point, I'd expect that this second call below would be not - ;; necessary, but if it is removed, the uextend is applied before the call, - ;; and the i64 is spilled (then reloaded), causing the bug to not appear. So - ;; an additional call it is! - - v100 = call fn0(v3) - -; nextln: movq %r12, %rsi -; nextln: movq %rsi, rsp(8 + virtual offset) -; nextln: nop len=0 -; nextln: movq %r12, %rdi -; nextln: load_ext_name %g+0, %rsi -; nextln: call *%rsi - - ;; Cause reloads of all the values. Most are in registers, but one of them - ;; is on the stack. Make sure they're all used in the final computation. - - v101 = iadd.i64 v100, v31 - v102 = iadd.i64 v101, v41 - v103 = iadd.i64 v102, v51 - v104 = iadd.i64 v103, v61 - v105 = iadd.i64 v104, v71 - v200 = iadd.i64 v105, v81 - -; nextln: movq %rax, %rsi -; nextln: movq rsp(8 + virtual offset), %rdi -; nextln: addq %rdi, %rsi -; nextln: addq %r13, %rsi -; nextln: addq %r14, %rsi -; nextln: addq %rbx, %rsi -; nextln: addq %r15, %rsi - -;; The reload operates on a full word, so uses movq. -; nextln: movq rsp(0 + virtual offset), %rdi - -; nextln: addq %rdi, %rsi -; nextln: movq %rsi, %rax -; nextln: movq 16(%rsp), %r12 -; nextln: movq 24(%rsp), %r13 -; nextln: movq 32(%rsp), %r14 -; nextln: movq 40(%rsp), %rbx -; nextln: movq 48(%rsp), %r15 -; nextln: addq $$64, %rsp - - return v200 -; nextln: movq %rbp, %rsp -; nextln: popq %rbp -; nextln: ret -} diff --git a/cranelift/filetests/filetests/isa/x64/struct-arg.clif b/cranelift/filetests/filetests/isa/x64/struct-arg.clif index b4094abf5c..9076630171 100644 --- a/cranelift/filetests/filetests/isa/x64/struct-arg.clif +++ b/cranelift/filetests/filetests/isa/x64/struct-arg.clif @@ -7,20 +7,14 @@ block0(v0: i64): return v1 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 8) -; Inst 0: pushq %rbp -; Inst 1: movq %rsp, %rbp -; Inst 2: lea 16(%rbp), %rsi -; Inst 3: movzbq 0(%rsi), %rsi -; Inst 4: movq %rsi, %rax -; Inst 5: movq %rbp, %rsp -; Inst 6: popq %rbp -; Inst 7: ret -; }} +; pushq %rbp +; movq %rsp, %rbp +; block0: +; lea 16(%rbp), %rsi +; movzbq 0(%rsi), %rax +; movq %rbp, %rsp +; popq %rbp +; ret function u0:1(i64 sarg(64), i64) -> i8 system_v { block0(v0: i64, v1: i64): @@ -30,22 +24,16 @@ block0(v0: i64, v1: i64): return v4 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 10) -; Inst 0: pushq %rbp -; Inst 1: movq %rsp, %rbp -; Inst 2: lea 16(%rbp), %rsi -; Inst 3: movzbq 0(%rdi), %rdi -; Inst 4: movzbq 0(%rsi), %rsi -; Inst 5: addl %esi, %edi -; Inst 6: movq %rdi, %rax -; Inst 7: movq %rbp, %rsp -; Inst 8: popq %rbp -; Inst 9: ret -; }} +; pushq %rbp +; movq %rsp, %rbp +; block0: +; lea 16(%rbp), %rsi +; movzbq 0(%rdi), %rax +; movzbq 0(%rsi), %r11 +; addl %eax, %r11d, %eax +; movq %rbp, %rsp +; popq %rbp +; ret function u0:2(i64) -> i8 system_v { fn1 = colocated u0:0(i64 sarg(64)) -> i8 system_v @@ -55,27 +43,23 @@ block0(v0: i64): return v1 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 15) -; Inst 0: pushq %rbp -; Inst 1: movq %rsp, %rbp -; Inst 2: movq %rdi, %rsi -; Inst 3: subq $64, %rsp -; Inst 4: virtual_sp_offset_adjust 64 -; Inst 5: lea 0(%rsp), %rdi -; Inst 6: movl $64, %edx -; Inst 7: load_ext_name %Memcpy+0, %rcx -; Inst 8: call *%rcx -; Inst 9: call User { namespace: 0, index: 0 } -; Inst 10: addq $64, %rsp -; Inst 11: virtual_sp_offset_adjust -64 -; Inst 12: movq %rbp, %rsp -; Inst 13: popq %rbp -; Inst 14: ret -; }} +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq %rdi, %r8 +; subq %rsp, $64, %rsp +; virtual_sp_offset_adjust 64 +; lea 0(%rsp), %rdi +; movq %r8, %rsi +; movl $64, %edx +; load_ext_name %Memcpy+0, %rcx +; call *%rcx +; call User { namespace: 0, index: 0 } +; addq %rsp, $64, %rsp +; virtual_sp_offset_adjust -64 +; movq %rbp, %rsp +; popq %rbp +; ret function u0:3(i64, i64) -> i8 system_v { fn1 = colocated u0:0(i64, i64 sarg(64)) -> i8 system_v @@ -85,32 +69,27 @@ block0(v0: i64, v1: i64): return v2 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 20) -; Inst 0: pushq %rbp -; Inst 1: movq %rsp, %rbp -; Inst 2: subq $16, %rsp -; Inst 3: movq %r12, 0(%rsp) -; Inst 4: movq %rdi, %r12 -; Inst 5: subq $64, %rsp -; Inst 6: virtual_sp_offset_adjust 64 -; Inst 7: lea 0(%rsp), %rdi -; Inst 8: movl $64, %edx -; Inst 9: load_ext_name %Memcpy+0, %rcx -; Inst 10: call *%rcx -; Inst 11: movq %r12, %rdi -; Inst 12: call User { namespace: 0, index: 0 } -; Inst 13: addq $64, %rsp -; Inst 14: virtual_sp_offset_adjust -64 -; Inst 15: movq 0(%rsp), %r12 -; Inst 16: addq $16, %rsp -; Inst 17: movq %rbp, %rsp -; Inst 18: popq %rbp -; Inst 19: ret -; }} +; pushq %rbp +; movq %rsp, %rbp +; subq %rsp, $16, %rsp +; movq %r12, 0(%rsp) +; block0: +; movq %rdi, %r12 +; subq %rsp, $64, %rsp +; virtual_sp_offset_adjust 64 +; lea 0(%rsp), %rdi +; movl $64, %edx +; load_ext_name %Memcpy+0, %rcx +; call *%rcx +; movq %r12, %rdi +; call User { namespace: 0, index: 0 } +; addq %rsp, $64, %rsp +; virtual_sp_offset_adjust -64 +; movq 0(%rsp), %r12 +; addq %rsp, $16, %rsp +; movq %rbp, %rsp +; popq %rbp +; ret function u0:4(i64 sarg(128), i64 sarg(64)) -> i8 system_v { block0(v0: i64, v1: i64): @@ -120,23 +99,17 @@ block0(v0: i64, v1: i64): return v4 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 11) -; Inst 0: pushq %rbp -; Inst 1: movq %rsp, %rbp -; Inst 2: lea 16(%rbp), %rsi -; Inst 3: lea 144(%rbp), %rdi -; Inst 4: movzbq 0(%rsi), %rsi -; Inst 5: movzbq 0(%rdi), %rdi -; Inst 6: addl %edi, %esi -; Inst 7: movq %rsi, %rax -; Inst 8: movq %rbp, %rsp -; Inst 9: popq %rbp -; Inst 10: ret -; }} +; pushq %rbp +; movq %rsp, %rbp +; block0: +; lea 16(%rbp), %rsi +; lea 144(%rbp), %rdi +; movzbq 0(%rsi), %rax +; movzbq 0(%rdi), %r11 +; addl %eax, %r11d, %eax +; movq %rbp, %rsp +; popq %rbp +; ret function u0:5(i64, i64, i64) -> i8 system_v { fn1 = colocated u0:0(i64, i64 sarg(128), i64 sarg(64)) -> i8 system_v @@ -146,38 +119,33 @@ block0(v0: i64, v1: i64, v2: i64): return v3 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 28) -; Inst 0: pushq %rbp -; Inst 1: movq %rsp, %rbp -; Inst 2: subq $16, %rsp -; Inst 3: movq %r12, 0(%rsp) -; Inst 4: movq %r13, 8(%rsp) -; Inst 5: movq %rdi, %r12 -; Inst 6: movq %rdx, %r13 -; Inst 7: subq $192, %rsp -; Inst 8: virtual_sp_offset_adjust 192 -; Inst 9: lea 0(%rsp), %rdi -; Inst 10: movl $128, %edx -; Inst 11: load_ext_name %Memcpy+0, %rcx -; Inst 12: call *%rcx -; Inst 13: lea 128(%rsp), %rdi -; Inst 14: movq %r13, %rsi -; Inst 15: movl $64, %edx -; Inst 16: load_ext_name %Memcpy+0, %rcx -; Inst 17: call *%rcx -; Inst 18: movq %r12, %rdi -; Inst 19: call User { namespace: 0, index: 0 } -; Inst 20: addq $192, %rsp -; Inst 21: virtual_sp_offset_adjust -192 -; Inst 22: movq 0(%rsp), %r12 -; Inst 23: movq 8(%rsp), %r13 -; Inst 24: addq $16, %rsp -; Inst 25: movq %rbp, %rsp -; Inst 26: popq %rbp -; Inst 27: ret -; }} +; pushq %rbp +; movq %rsp, %rbp +; subq %rsp, $16, %rsp +; movq %rbx, 0(%rsp) +; movq %r14, 8(%rsp) +; block0: +; movq %rdi, %r14 +; movq %rdx, %rbx +; subq %rsp, $192, %rsp +; virtual_sp_offset_adjust 192 +; lea 0(%rsp), %rdi +; movl $128, %edx +; load_ext_name %Memcpy+0, %rcx +; call *%rcx +; lea 128(%rsp), %rdi +; movq %rbx, %rsi +; movl $64, %edx +; load_ext_name %Memcpy+0, %rcx +; call *%rcx +; movq %r14, %rdi +; call User { namespace: 0, index: 0 } +; addq %rsp, $192, %rsp +; virtual_sp_offset_adjust -192 +; movq 0(%rsp), %rbx +; movq 8(%rsp), %r14 +; addq %rsp, $16, %rsp +; movq %rbp, %rsp +; popq %rbp +; ret diff --git a/cranelift/filetests/filetests/isa/x64/struct-ret.clif b/cranelift/filetests/filetests/isa/x64/struct-ret.clif index 0ecda5ec32..a131363569 100644 --- a/cranelift/filetests/filetests/isa/x64/struct-ret.clif +++ b/cranelift/filetests/filetests/isa/x64/struct-ret.clif @@ -8,18 +8,13 @@ block0(v0: i64): return } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 8) -; Inst 0: pushq %rbp -; Inst 1: movq %rsp, %rbp -; Inst 2: movq %rdi, %rax -; Inst 3: movl $42, %esi -; Inst 4: movq %rsi, 0(%rdi) -; Inst 5: movq %rbp, %rsp -; Inst 6: popq %rbp -; Inst 7: ret -; }} +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq %rdi, %rax +; movl $42, %edx +; movq %rdx, 0(%rdi) +; movq %rbp, %rsp +; popq %rbp +; ret diff --git a/cranelift/filetests/filetests/isa/x64/tls_elf.clif b/cranelift/filetests/filetests/isa/x64/tls_elf.clif index 5f9333772b..58b21308da 100644 --- a/cranelift/filetests/filetests/isa/x64/tls_elf.clif +++ b/cranelift/filetests/filetests/isa/x64/tls_elf.clif @@ -10,16 +10,11 @@ block0(v0: i32): return v1 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 6) -; Inst 0: pushq %rbp -; Inst 1: movq %rsp, %rbp -; Inst 2: elf_tls_get_addr User { namespace: 1, index: 0 } -; Inst 3: movq %rbp, %rsp -; Inst 4: popq %rbp -; Inst 5: ret -; }} +; pushq %rbp +; movq %rsp, %rbp +; block0: +; elf_tls_get_addr User { namespace: 1, index: 0 } +; movq %rbp, %rsp +; popq %rbp +; ret diff --git a/cranelift/filetests/filetests/isa/x64/uextend-elision.clif b/cranelift/filetests/filetests/isa/x64/uextend-elision.clif index e69d941c54..1f88ad6538 100644 --- a/cranelift/filetests/filetests/isa/x64/uextend-elision.clif +++ b/cranelift/filetests/filetests/isa/x64/uextend-elision.clif @@ -8,17 +8,12 @@ block0(v0: i32, v1: i32): return v3 } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 7) -; Inst 0: pushq %rbp -; Inst 1: movq %rsp, %rbp -; Inst 2: addl %esi, %edi -; Inst 3: movq %rdi, %rax -; Inst 4: movq %rbp, %rsp -; Inst 5: popq %rbp -; Inst 6: ret -; }} +; pushq %rbp +; movq %rsp, %rbp +; block0: +; addl %edi, %esi, %edi +; movq %rdi, %rax +; movq %rbp, %rsp +; popq %rbp +; ret diff --git a/cranelift/filetests/filetests/isa/x64/unused_jt_unreachable_block.clif b/cranelift/filetests/filetests/isa/x64/unused_jt_unreachable_block.clif index 3840689b9b..7160805ddb 100644 --- a/cranelift/filetests/filetests/isa/x64/unused_jt_unreachable_block.clif +++ b/cranelift/filetests/filetests/isa/x64/unused_jt_unreachable_block.clif @@ -13,15 +13,10 @@ block1: trap unreachable } -; VCode_ShowWithRRU {{ -; Entry block: 0 -; Block 0: -; (original IR block: block0) -; (instruction range: 0 .. 5) -; Inst 0: pushq %rbp -; Inst 1: movq %rsp, %rbp -; Inst 2: movq %rbp, %rsp -; Inst 3: popq %rbp -; Inst 4: ret -; }} +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movq %rbp, %rsp +; popq %rbp +; ret