diff --git a/build.rs b/build.rs index 42d786799f..9ee3b893c7 100644 --- a/build.rs +++ b/build.rs @@ -219,6 +219,9 @@ fn ignore(testsuite: &str, testname: &str, strategy: &str) -> bool { _ => (), }, "Cranelift" => match (testsuite, testname) { + // No simd support yet for s390x. + ("simd", _) if platform_is_s390x() => return true, + ("simd", _) if cfg!(feature = "old-x86-backend") => return true, // skip all SIMD tests on old backend. // These are new instructions that are not really implemented in any backend. ("simd", "simd_i8x16_arith2") @@ -243,3 +246,7 @@ fn ignore(testsuite: &str, testname: &str, strategy: &str) -> bool { fn platform_is_x64() -> bool { env::var("CARGO_CFG_TARGET_ARCH").unwrap() == "x86_64" } + +fn platform_is_s390x() -> bool { + env::var("CARGO_CFG_TARGET_ARCH").unwrap() == "s390x" +} diff --git a/cranelift/codegen/Cargo.toml b/cranelift/codegen/Cargo.toml index 9eb990f896..65b836f990 100644 --- a/cranelift/codegen/Cargo.toml +++ b/cranelift/codegen/Cargo.toml @@ -62,6 +62,7 @@ unwind = ["gimli"] x86 = [] arm64 = [] riscv = [] +s390x = [] arm32 = [] # Work-in-progress codegen backend for ARM. # Stub feature that does nothing, for Cargo-features compatibility: the new @@ -75,7 +76,8 @@ old-x86-backend = [] all-arch = [ "x86", "arm64", - "riscv" + "riscv", + "s390x" ] # For dependent crates that want to serialize some parts of cranelift diff --git a/cranelift/codegen/meta/src/isa/mod.rs b/cranelift/codegen/meta/src/isa/mod.rs index ed8db85f0d..34032842c2 100644 --- a/cranelift/codegen/meta/src/isa/mod.rs +++ b/cranelift/codegen/meta/src/isa/mod.rs @@ -6,6 +6,7 @@ use std::fmt; mod arm32; mod arm64; mod riscv; +mod s390x; pub(crate) mod x86; /// Represents known ISA target. @@ -15,6 +16,7 @@ pub enum Isa { X86, Arm32, Arm64, + S390x, } impl Isa { @@ -31,6 +33,7 @@ impl Isa { match arch { "riscv" => Some(Isa::Riscv), "aarch64" => Some(Isa::Arm64), + "s390x" => Some(Isa::S390x), x if ["x86_64", "i386", "i586", "i686"].contains(&x) => Some(Isa::X86), x if x.starts_with("arm") || arch.starts_with("thumb") => Some(Isa::Arm32), _ => None, @@ -39,7 +42,7 @@ impl Isa { /// Returns all supported isa targets. pub fn all() -> &'static [Isa] { - &[Isa::Riscv, Isa::X86, Isa::Arm32, Isa::Arm64] + &[Isa::Riscv, Isa::X86, Isa::Arm32, Isa::Arm64, Isa::S390x] } } @@ -51,6 +54,7 @@ impl fmt::Display for Isa { Isa::X86 => write!(f, "x86"), Isa::Arm32 => write!(f, "arm32"), Isa::Arm64 => write!(f, "arm64"), + Isa::S390x => write!(f, "s390x"), } } } @@ -62,6 +66,7 @@ pub(crate) fn define(isas: &[Isa], shared_defs: &mut SharedDefinitions) -> Vec x86::define(shared_defs), Isa::Arm32 => arm32::define(shared_defs), Isa::Arm64 => arm64::define(shared_defs), + Isa::S390x => s390x::define(shared_defs), }) .collect() } diff --git a/cranelift/codegen/meta/src/isa/s390x/mod.rs b/cranelift/codegen/meta/src/isa/s390x/mod.rs new file mode 100644 index 0000000000..2ec1040553 --- /dev/null +++ b/cranelift/codegen/meta/src/isa/s390x/mod.rs @@ -0,0 +1,31 @@ +use crate::cdsl::cpu_modes::CpuMode; +use crate::cdsl::instructions::{InstructionGroupBuilder, InstructionPredicateMap}; +use crate::cdsl::isa::TargetIsa; +use crate::cdsl::recipes::Recipes; +use crate::cdsl::regs::IsaRegsBuilder; +use crate::cdsl::settings::SettingGroupBuilder; + +use crate::shared::Definitions as SharedDefinitions; + +pub(crate) fn define(shared_defs: &mut SharedDefinitions) -> TargetIsa { + let inst_group = InstructionGroupBuilder::new(&mut shared_defs.all_instructions).build(); + let settings = SettingGroupBuilder::new("s390x").build(); + let regs = IsaRegsBuilder::new().build(); + let recipes = Recipes::new(); + let encodings_predicates = InstructionPredicateMap::new(); + + let mut mode = CpuMode::new("s390x"); + let expand = shared_defs.transform_groups.by_name("expand"); + mode.legalize_default(expand); + let cpu_modes = vec![mode]; + + TargetIsa::new( + "s390x", + inst_group, + settings, + regs, + recipes, + cpu_modes, + encodings_predicates, + ) +} diff --git a/cranelift/codegen/meta/src/lib.rs b/cranelift/codegen/meta/src/lib.rs index ead2c4442f..29a545aad6 100644 --- a/cranelift/codegen/meta/src/lib.rs +++ b/cranelift/codegen/meta/src/lib.rs @@ -116,6 +116,9 @@ pub fn generate( isa::Isa::Arm64 => { // aarch64 doesn't have platform-specific settings. } + isa::Isa::S390x => { + // s390x doesn't have platform-specific settings. + } isa::Isa::Arm32 | isa::Isa::Riscv => todo!(), } } diff --git a/cranelift/codegen/src/isa/mod.rs b/cranelift/codegen/src/isa/mod.rs index a24f64a256..4df2d78193 100644 --- a/cranelift/codegen/src/isa/mod.rs +++ b/cranelift/codegen/src/isa/mod.rs @@ -91,6 +91,9 @@ mod arm32; #[cfg(feature = "arm64")] pub(crate) mod aarch64; +#[cfg(feature = "s390x")] +mod s390x; + pub mod unwind; mod call_conv; @@ -160,6 +163,7 @@ pub fn lookup_variant(triple: Triple, variant: BackendVariant) -> Result isa_builder!(arm32, (feature = "arm32"), triple), (Architecture::Aarch64 { .. }, _) => isa_builder!(aarch64, (feature = "arm64"), triple), + (Architecture::S390x { .. }, _) => isa_builder!(s390x, (feature = "s390x"), triple), _ => Err(LookupError::Unsupported), } } diff --git a/cranelift/codegen/src/isa/s390x/abi.rs b/cranelift/codegen/src/isa/s390x/abi.rs new file mode 100644 index 0000000000..14344e5866 --- /dev/null +++ b/cranelift/codegen/src/isa/s390x/abi.rs @@ -0,0 +1,770 @@ +//! Implementation of a standard S390x ABI. +//! +//! This machine uses the "vanilla" ABI implementation from abi_impl.rs, +//! however a few details are different from the description there: +//! +//! - On s390x, the caller must provide a "register save area" of 160 +//! bytes to any function it calls. The called function is free to use +//! this space for any purpose; usually to save callee-saved GPRs. +//! (Note that while this area is allocated by the caller, it is counted +//! as part of the callee's stack frame; in particular, the callee's CFA +//! is the top of the register save area, not the incoming SP value.) +//! +//! - Overflow arguments are passed on the stack starting immediately +//! above the register save area. On s390x, this space is allocated +//! only once directly in the prologue, using a size large enough to +//! hold overflow arguments for every call in the function. +//! +//! - On s390x we do not use a frame pointer register; instead, every +//! element of the stack frame is addressed via (constant) offsets +//! from the stack pointer. Note that due to the above (and because +//! there are no variable-sized stack allocations in cranelift), the +//! value of the stack pointer register never changes after the +//! initial allocation in the function prologue. +//! +//! Overall, the stack frame layout on s390x is as follows: +//! +//! ```plain +//! (high address) +//! +//! +---------------------------+ +//! | ... | +//! CFA -----> | stack args | +//! +---------------------------+ +//! | ... | +//! | 160 bytes reg save area | +//! SP at function entry -----> | (used to save GPRs) | +//! +---------------------------+ +//! | ... | +//! | clobbered callee-saves | +//! | (used to save FPRs) | +//! unwind-frame base ----> | (alloc'd by prologue) | +//! +---------------------------+ +//! | ... | +//! | spill slots | +//! | (accessed via nominal SP) | +//! | ... | +//! | stack slots | +//! | (accessed via nominal SP) | +//! nominal SP ---------------> | (alloc'd by prologue) | +//! +---------------------------+ +//! | ... | +//! | args for call | +//! | outgoing reg save area | +//! SP during function ------> | (alloc'd by prologue) | +//! +---------------------------+ +//! +//! (low address) +//! ``` + +use crate::ir; +use crate::ir::condcodes::IntCC; +use crate::ir::types; +use crate::ir::MemFlags; +use crate::ir::Type; +use crate::isa; +use crate::isa::s390x::inst::*; +use crate::isa::unwind::UnwindInst; +use crate::machinst::*; +use crate::settings; +use crate::{CodegenError, CodegenResult}; +use alloc::boxed::Box; +use alloc::vec::Vec; +use regalloc::{RealReg, Reg, RegClass, Set, Writable}; +use smallvec::{smallvec, SmallVec}; +use std::convert::TryFrom; + +// We use a generic implementation that factors out ABI commonalities. + +/// Support for the S390x ABI from the callee side (within a function body). +pub type S390xABICallee = ABICalleeImpl; + +/// Support for the S390x ABI from the caller side (at a callsite). +pub type S390xABICaller = ABICallerImpl; + +/// ABI Register usage + +fn in_int_reg(ty: Type) -> bool { + match ty { + types::I8 | types::I16 | types::I32 | types::I64 | types::R64 => true, + types::B1 | types::B8 | types::B16 | types::B32 | types::B64 => true, + _ => false, + } +} + +fn in_flt_reg(ty: Type) -> bool { + match ty { + types::F32 | types::F64 => true, + _ => false, + } +} + +fn get_intreg_for_arg(idx: usize) -> Option { + match idx { + 0 => Some(regs::gpr(2)), + 1 => Some(regs::gpr(3)), + 2 => Some(regs::gpr(4)), + 3 => Some(regs::gpr(5)), + 4 => Some(regs::gpr(6)), + _ => None, + } +} + +fn get_fltreg_for_arg(idx: usize) -> Option { + match idx { + 0 => Some(regs::fpr(0)), + 1 => Some(regs::fpr(2)), + 2 => Some(regs::fpr(4)), + 3 => Some(regs::fpr(6)), + _ => None, + } +} + +fn get_intreg_for_ret(idx: usize) -> Option { + match idx { + 0 => Some(regs::gpr(2)), + // ABI extension to support multi-value returns: + 1 => Some(regs::gpr(3)), + 2 => Some(regs::gpr(4)), + 3 => Some(regs::gpr(5)), + _ => None, + } +} + +fn get_fltreg_for_ret(idx: usize) -> Option { + match idx { + 0 => Some(regs::fpr(0)), + // ABI extension to support multi-value returns: + 1 => Some(regs::fpr(2)), + 2 => Some(regs::fpr(4)), + 3 => Some(regs::fpr(6)), + _ => None, + } +} + +/// This is the limit for the size of argument and return-value areas on the +/// stack. We place a reasonable limit here to avoid integer overflow issues +/// with 32-bit arithmetic: for now, 128 MB. +static STACK_ARG_RET_SIZE_LIMIT: u64 = 128 * 1024 * 1024; + +impl Into for StackAMode { + fn into(self) -> MemArg { + match self { + StackAMode::FPOffset(off, _ty) => MemArg::InitialSPOffset { off }, + StackAMode::NominalSPOffset(off, _ty) => MemArg::NominalSPOffset { off }, + StackAMode::SPOffset(off, _ty) => { + MemArg::reg_plus_off(stack_reg(), off, MemFlags::trusted()) + } + } + } +} + +/// S390x-specific ABI behavior. This struct just serves as an implementation +/// point for the trait; it is never actually instantiated. +pub struct S390xMachineDeps; + +impl ABIMachineSpec for S390xMachineDeps { + type I = Inst; + + fn word_bits() -> u32 { + 64 + } + + /// Return required stack alignment in bytes. + fn stack_align(_call_conv: isa::CallConv) -> u32 { + 8 + } + + fn compute_arg_locs( + call_conv: isa::CallConv, + _flags: &settings::Flags, + params: &[ir::AbiParam], + args_or_rets: ArgsOrRets, + add_ret_area_ptr: bool, + ) -> CodegenResult<(Vec, i64, Option)> { + let mut next_gpr = 0; + let mut next_fpr = 0; + let mut next_stack: u64 = 0; + let mut ret = vec![]; + + if args_or_rets == ArgsOrRets::Args { + next_stack = 160; + } + + for i in 0..params.len() { + let param = ¶ms[i]; + + // Validate "purpose". + match ¶m.purpose { + &ir::ArgumentPurpose::VMContext + | &ir::ArgumentPurpose::Normal + | &ir::ArgumentPurpose::StackLimit + | &ir::ArgumentPurpose::SignatureId => {} + _ => panic!( + "Unsupported argument purpose {:?} in signature: {:?}", + param.purpose, params + ), + } + + let intreg = in_int_reg(param.value_type); + let fltreg = in_flt_reg(param.value_type); + debug_assert!(intreg || fltreg); + debug_assert!(!(intreg && fltreg)); + + let (next_reg, candidate) = if intreg { + let candidate = match args_or_rets { + ArgsOrRets::Args => get_intreg_for_arg(next_gpr), + ArgsOrRets::Rets => get_intreg_for_ret(next_gpr), + }; + (&mut next_gpr, candidate) + } else { + let candidate = match args_or_rets { + ArgsOrRets::Args => get_fltreg_for_arg(next_fpr), + ArgsOrRets::Rets => get_fltreg_for_ret(next_fpr), + }; + (&mut next_fpr, candidate) + }; + + // In the Wasmtime ABI only the first return value can be in a register. + let candidate = + if call_conv.extends_wasmtime() && args_or_rets == ArgsOrRets::Rets && i > 0 { + None + } else { + candidate + }; + + if let Some(reg) = candidate { + ret.push(ABIArg::reg( + reg.to_real_reg(), + param.value_type, + param.extension, + param.purpose, + )); + *next_reg += 1; + } else { + // Compute size. Every argument or return value takes a slot of + // at least 8 bytes, except for return values in the Wasmtime ABI. + let size = (ty_bits(param.value_type) / 8) as u64; + let slot_size = if call_conv.extends_wasmtime() && args_or_rets == ArgsOrRets::Rets + { + size + } else { + std::cmp::max(size, 8) + }; + + // Align the stack slot. + debug_assert!(slot_size.is_power_of_two()); + next_stack = align_to(next_stack, slot_size); + + // If the type is actually of smaller size (and the argument + // was not extended), it is passed right-aligned. + let offset = if size < slot_size && param.extension == ir::ArgumentExtension::None { + slot_size - size + } else { + 0 + }; + ret.push(ABIArg::stack( + (next_stack + offset) as i64, + param.value_type, + param.extension, + param.purpose, + )); + next_stack += slot_size; + } + } + + next_stack = align_to(next_stack, 8); + + let extra_arg = if add_ret_area_ptr { + debug_assert!(args_or_rets == ArgsOrRets::Args); + if let Some(reg) = get_intreg_for_arg(next_gpr) { + ret.push(ABIArg::reg( + reg.to_real_reg(), + types::I64, + ir::ArgumentExtension::None, + ir::ArgumentPurpose::Normal, + )); + } else { + ret.push(ABIArg::stack( + next_stack as i64, + types::I64, + ir::ArgumentExtension::None, + ir::ArgumentPurpose::Normal, + )); + next_stack += 8; + } + Some(ret.len() - 1) + } else { + None + }; + + // To avoid overflow issues, limit the arg/return size to something + // reasonable -- here, 128 MB. + if next_stack > STACK_ARG_RET_SIZE_LIMIT { + return Err(CodegenError::ImplLimitExceeded); + } + + Ok((ret, next_stack as i64, extra_arg)) + } + + fn fp_to_arg_offset(_call_conv: isa::CallConv, _flags: &settings::Flags) -> i64 { + 0 + } + + fn gen_load_stack(mem: StackAMode, into_reg: Writable, ty: Type) -> Inst { + Inst::gen_load(into_reg, mem.into(), ty) + } + + fn gen_store_stack(mem: StackAMode, from_reg: Reg, ty: Type) -> Inst { + Inst::gen_store(mem.into(), from_reg, ty) + } + + fn gen_move(to_reg: Writable, from_reg: Reg, ty: Type) -> Inst { + Inst::gen_move(to_reg, from_reg, ty) + } + + fn gen_extend( + to_reg: Writable, + from_reg: Reg, + signed: bool, + from_bits: u8, + to_bits: u8, + ) -> Inst { + assert!(from_bits < to_bits); + Inst::Extend { + rd: to_reg, + rn: from_reg, + signed, + from_bits, + to_bits, + } + } + + fn gen_ret() -> Inst { + Inst::Ret { link: gpr(14) } + } + + fn gen_add_imm(into_reg: Writable, from_reg: Reg, imm: u32) -> SmallInstVec { + let mut insts = SmallVec::new(); + if let Some(imm) = UImm12::maybe_from_u64(imm as u64) { + insts.push(Inst::LoadAddr { + rd: into_reg, + mem: MemArg::BXD12 { + base: from_reg, + index: zero_reg(), + disp: imm, + flags: MemFlags::trusted(), + }, + }); + } else if let Some(imm) = SImm20::maybe_from_i64(imm as i64) { + insts.push(Inst::LoadAddr { + rd: into_reg, + mem: MemArg::BXD20 { + base: from_reg, + index: zero_reg(), + disp: imm, + flags: MemFlags::trusted(), + }, + }); + } else { + if from_reg != into_reg.to_reg() { + insts.push(Inst::mov64(into_reg, from_reg)); + } + insts.push(Inst::AluRUImm32 { + alu_op: ALUOp::Add64, + rd: into_reg, + imm, + }); + } + insts + } + + fn gen_stack_lower_bound_trap(limit_reg: Reg) -> SmallInstVec { + let mut insts = SmallVec::new(); + insts.push(Inst::CmpTrapRR { + op: CmpOp::CmpL64, + rn: stack_reg(), + rm: limit_reg, + cond: Cond::from_intcc(IntCC::UnsignedLessThanOrEqual), + trap_code: ir::TrapCode::StackOverflow, + }); + insts + } + + fn gen_epilogue_placeholder() -> Inst { + Inst::EpiloguePlaceholder + } + + fn gen_get_stack_addr(mem: StackAMode, into_reg: Writable, _ty: Type) -> Inst { + let mem = mem.into(); + Inst::LoadAddr { rd: into_reg, mem } + } + + fn get_stacklimit_reg() -> Reg { + spilltmp_reg() + } + + fn gen_load_base_offset(into_reg: Writable, base: Reg, offset: i32, ty: Type) -> Inst { + let mem = MemArg::reg_plus_off(base, offset.into(), MemFlags::trusted()); + Inst::gen_load(into_reg, mem, ty) + } + + fn gen_store_base_offset(base: Reg, offset: i32, from_reg: Reg, ty: Type) -> Inst { + let mem = MemArg::reg_plus_off(base, offset.into(), MemFlags::trusted()); + Inst::gen_store(mem, from_reg, ty) + } + + fn gen_sp_reg_adjust(imm: i32) -> SmallInstVec { + if imm == 0 { + return SmallVec::new(); + } + + let mut insts = SmallVec::new(); + if let Ok(imm) = i16::try_from(imm) { + insts.push(Inst::AluRSImm16 { + alu_op: ALUOp::Add64, + rd: writable_stack_reg(), + imm, + }); + } else { + insts.push(Inst::AluRSImm32 { + alu_op: ALUOp::Add64, + rd: writable_stack_reg(), + imm, + }); + } + insts + } + + fn gen_nominal_sp_adj(offset: i32) -> Inst { + Inst::VirtualSPOffsetAdj { + offset: offset.into(), + } + } + + fn gen_prologue_frame_setup(_flags: &settings::Flags) -> SmallInstVec { + SmallVec::new() + } + + fn gen_epilogue_frame_restore(_flags: &settings::Flags) -> SmallInstVec { + SmallVec::new() + } + + fn gen_probestack(_: u32) -> SmallInstVec { + // TODO: implement if we ever require stack probes on an s390x host + // (unlikely unless Lucet is ported) + smallvec![] + } + + // Returns stack bytes used as well as instructions. Does not adjust + // nominal SP offset; abi_impl generic code will do that. + fn gen_clobber_save( + call_conv: isa::CallConv, + flags: &settings::Flags, + clobbers: &Set>, + fixed_frame_storage_size: u32, + outgoing_args_size: u32, + ) -> (u64, SmallVec<[Inst; 16]>) { + let mut insts = SmallVec::new(); + + // Collect clobbered registers. + let (clobbered_gpr, clobbered_fpr) = get_regs_saved_in_prologue(call_conv, clobbers); + let mut first_clobbered_gpr = 16; + for reg in clobbered_gpr { + let enc = reg.to_reg().get_hw_encoding(); + if enc < first_clobbered_gpr { + first_clobbered_gpr = enc; + } + } + let clobber_size = clobbered_fpr.len() * 8; + if flags.unwind_info() { + insts.push(Inst::Unwind { + inst: UnwindInst::DefineNewFrame { + offset_upward_to_caller_sp: 160, + offset_downward_to_clobbers: clobber_size as u32, + }, + }); + } + + // Use STMG to save clobbered GPRs into save area. + if first_clobbered_gpr < 16 { + let offset = 8 * first_clobbered_gpr as i64; + insts.push(Inst::StoreMultiple64 { + rt: gpr(first_clobbered_gpr as u8), + rt2: gpr(15), + addr_reg: stack_reg(), + addr_off: SImm20::maybe_from_i64(offset).unwrap(), + }); + } + if flags.unwind_info() { + for i in first_clobbered_gpr..16 { + insts.push(Inst::Unwind { + inst: UnwindInst::SaveReg { + clobber_offset: clobber_size as u32 + (i * 8) as u32, + reg: gpr(i as u8).to_real_reg(), + }, + }); + } + } + + // Decrement stack pointer. + let stack_size = + outgoing_args_size as i32 + clobber_size as i32 + fixed_frame_storage_size as i32; + insts.extend(Self::gen_sp_reg_adjust(-stack_size)); + if flags.unwind_info() { + insts.push(Inst::Unwind { + inst: UnwindInst::StackAlloc { + size: stack_size as u32, + }, + }); + } + + let sp_adj = outgoing_args_size as i32; + if sp_adj > 0 { + insts.push(Self::gen_nominal_sp_adj(sp_adj)); + } + + // Save FPRs. + for (i, reg) in clobbered_fpr.iter().enumerate() { + insts.push(Inst::FpuStore64 { + rd: reg.to_reg().to_reg(), + mem: MemArg::reg_plus_off( + stack_reg(), + (i * 8) as i64 + outgoing_args_size as i64 + fixed_frame_storage_size as i64, + MemFlags::trusted(), + ), + }); + if flags.unwind_info() { + insts.push(Inst::Unwind { + inst: UnwindInst::SaveReg { + clobber_offset: (i * 8) as u32, + reg: reg.to_reg(), + }, + }); + } + } + + (clobber_size as u64, insts) + } + + fn gen_clobber_restore( + call_conv: isa::CallConv, + _: &settings::Flags, + clobbers: &Set>, + fixed_frame_storage_size: u32, + outgoing_args_size: u32, + ) -> SmallVec<[Inst; 16]> { + let mut insts = SmallVec::new(); + + // Collect clobbered registers. + let (clobbered_gpr, clobbered_fpr) = get_regs_saved_in_prologue(call_conv, clobbers); + let mut first_clobbered_gpr = 16; + for reg in clobbered_gpr { + let enc = reg.to_reg().get_hw_encoding(); + if enc < first_clobbered_gpr { + first_clobbered_gpr = enc; + } + } + let clobber_size = clobbered_fpr.len() * 8; + + // Restore FPRs. + for (i, reg) in clobbered_fpr.iter().enumerate() { + insts.push(Inst::FpuLoad64 { + rd: Writable::from_reg(reg.to_reg().to_reg()), + mem: MemArg::reg_plus_off( + stack_reg(), + (i * 8) as i64 + outgoing_args_size as i64 + fixed_frame_storage_size as i64, + MemFlags::trusted(), + ), + }); + } + + // Increment stack pointer unless it will be restored implicitly. + let stack_size = + outgoing_args_size as i32 + clobber_size as i32 + fixed_frame_storage_size as i32; + let implicit_sp_restore = first_clobbered_gpr < 16 + && SImm20::maybe_from_i64(8 * first_clobbered_gpr as i64 + stack_size as i64).is_some(); + if !implicit_sp_restore { + insts.extend(Self::gen_sp_reg_adjust(stack_size)); + } + + // Use LMG to restore clobbered GPRs from save area. + if first_clobbered_gpr < 16 { + let mut offset = 8 * first_clobbered_gpr as i64; + if implicit_sp_restore { + offset += stack_size as i64; + } + insts.push(Inst::LoadMultiple64 { + rt: writable_gpr(first_clobbered_gpr as u8), + rt2: writable_gpr(15), + addr_reg: stack_reg(), + addr_off: SImm20::maybe_from_i64(offset).unwrap(), + }); + } + + insts + } + + fn gen_call( + dest: &CallDest, + uses: Vec, + defs: Vec>, + opcode: ir::Opcode, + tmp: Writable, + _callee_conv: isa::CallConv, + _caller_conv: isa::CallConv, + ) -> SmallVec<[(InstIsSafepoint, Inst); 2]> { + let mut insts = SmallVec::new(); + match &dest { + &CallDest::ExtName(ref name, RelocDistance::Near) => insts.push(( + InstIsSafepoint::Yes, + Inst::Call { + link: writable_gpr(14), + info: Box::new(CallInfo { + dest: name.clone(), + uses, + defs, + opcode, + }), + }, + )), + &CallDest::ExtName(ref name, RelocDistance::Far) => { + insts.push(( + InstIsSafepoint::No, + Inst::LoadExtNameFar { + rd: tmp, + name: Box::new(name.clone()), + offset: 0, + }, + )); + insts.push(( + InstIsSafepoint::Yes, + Inst::CallInd { + link: writable_gpr(14), + info: Box::new(CallIndInfo { + rn: tmp.to_reg(), + uses, + defs, + opcode, + }), + }, + )); + } + &CallDest::Reg(reg) => insts.push(( + InstIsSafepoint::Yes, + Inst::CallInd { + link: writable_gpr(14), + info: Box::new(CallIndInfo { + rn: *reg, + uses, + defs, + opcode, + }), + }, + )), + } + + insts + } + + fn gen_memcpy( + _call_conv: isa::CallConv, + _dst: Reg, + _src: Reg, + _size: usize, + ) -> SmallVec<[Self::I; 8]> { + unimplemented!("StructArgs not implemented for S390X yet"); + } + + fn get_number_of_spillslots_for_value(rc: RegClass, ty: Type) -> u32 { + // We allocate in terms of 8-byte slots. + match (rc, ty) { + (RegClass::I64, _) => 1, + (RegClass::F64, _) => 1, + _ => panic!("Unexpected register class!"), + } + } + + /// Get the current virtual-SP offset from an instruction-emission state. + fn get_virtual_sp_offset_from_state(s: &EmitState) -> i64 { + s.virtual_sp_offset + } + + /// Get the nominal-SP-to-FP offset from an instruction-emission state. + fn get_nominal_sp_to_fp(s: &EmitState) -> i64 { + s.initial_sp_offset + } + + fn get_regs_clobbered_by_call(call_conv_of_callee: isa::CallConv) -> Vec> { + let mut caller_saved = Vec::new(); + for i in 0..15 { + let x = writable_gpr(i); + if is_reg_clobbered_by_call(call_conv_of_callee, x.to_reg().to_real_reg()) { + caller_saved.push(x); + } + } + for i in 0..15 { + let v = writable_fpr(i); + if is_reg_clobbered_by_call(call_conv_of_callee, v.to_reg().to_real_reg()) { + caller_saved.push(v); + } + } + caller_saved + } + + fn get_ext_mode( + _call_conv: isa::CallConv, + specified: ir::ArgumentExtension, + ) -> ir::ArgumentExtension { + specified + } +} + +fn is_reg_saved_in_prologue(_call_conv: isa::CallConv, r: RealReg) -> bool { + match r.get_class() { + RegClass::I64 => { + // r6 - r15 inclusive are callee-saves. + r.get_hw_encoding() >= 6 && r.get_hw_encoding() <= 15 + } + RegClass::F64 => { + // f8 - f15 inclusive are callee-saves. + r.get_hw_encoding() >= 8 && r.get_hw_encoding() <= 15 + } + _ => panic!("Unexpected RegClass"), + } +} + +fn get_regs_saved_in_prologue( + call_conv: isa::CallConv, + regs: &Set>, +) -> (Vec>, Vec>) { + let mut int_saves = vec![]; + let mut fpr_saves = vec![]; + for ® in regs.iter() { + if is_reg_saved_in_prologue(call_conv, reg.to_reg()) { + match reg.to_reg().get_class() { + RegClass::I64 => int_saves.push(reg), + RegClass::F64 => fpr_saves.push(reg), + _ => panic!("Unexpected RegClass"), + } + } + } + // Sort registers for deterministic code output. + int_saves.sort_by_key(|r| r.to_reg().get_index()); + fpr_saves.sort_by_key(|r| r.to_reg().get_index()); + (int_saves, fpr_saves) +} + +fn is_reg_clobbered_by_call(_call_conv: isa::CallConv, r: RealReg) -> bool { + match r.get_class() { + RegClass::I64 => { + // r0 - r5 inclusive are caller-saves. + r.get_hw_encoding() <= 5 + } + RegClass::F64 => { + // f0 - f7 inclusive are caller-saves. + r.get_hw_encoding() <= 7 + } + _ => panic!("Unexpected RegClass"), + } +} diff --git a/cranelift/codegen/src/isa/s390x/inst/args.rs b/cranelift/codegen/src/isa/s390x/inst/args.rs new file mode 100644 index 0000000000..75ee5cbcfe --- /dev/null +++ b/cranelift/codegen/src/isa/s390x/inst/args.rs @@ -0,0 +1,317 @@ +//! S390x ISA definitions: instruction arguments. + +// Some variants are never constructed, but we still want them as options in the future. +#![allow(dead_code)] + +use crate::ir::condcodes::{FloatCC, IntCC}; +use crate::ir::MemFlags; +use crate::isa::s390x::inst::*; +use crate::machinst::MachLabel; + +use regalloc::{PrettyPrint, RealRegUniverse, Reg}; + +use std::string::String; + +//============================================================================= +// Instruction sub-components (memory addresses): definitions + +/// A memory argument to load/store, encapsulating the possible addressing modes. +#[derive(Clone, Debug)] +pub enum MemArg { + // + // Real IBM Z addressing modes: + // + /// Base register, index register, and 12-bit unsigned displacement. + BXD12 { + base: Reg, + index: Reg, + disp: UImm12, + flags: MemFlags, + }, + + /// Base register, index register, and 20-bit signed displacement. + BXD20 { + base: Reg, + index: Reg, + disp: SImm20, + flags: MemFlags, + }, + + /// PC-relative Reference to a label. + Label { target: BranchTarget }, + + /// PC-relative Reference to a near symbol. + Symbol { + name: Box, + offset: i32, + flags: MemFlags, + }, + + // + // Virtual addressing modes that are lowered at emission time: + // + /// Arbitrary offset from a register. Converted to generation of large + /// offsets with multiple instructions as necessary during code emission. + RegOffset { reg: Reg, off: i64, flags: MemFlags }, + + /// Offset from the stack pointer at function entry. + InitialSPOffset { off: i64 }, + + /// Offset from the "nominal stack pointer", which is where the real SP is + /// just after stack and spill slots are allocated in the function prologue. + /// At emission time, this is converted to `SPOffset` with a fixup added to + /// the offset constant. The fixup is a running value that is tracked as + /// emission iterates through instructions in linear order, and can be + /// adjusted up and down with [Inst::VirtualSPOffsetAdj]. + /// + /// The standard ABI is in charge of handling this (by emitting the + /// adjustment meta-instructions). It maintains the invariant that "nominal + /// SP" is where the actual SP is after the function prologue and before + /// clobber pushes. See the diagram in the documentation for + /// [crate::isa::s390x::abi](the ABI module) for more details. + NominalSPOffset { off: i64 }, +} + +impl MemArg { + /// Memory reference using an address in a register. + pub fn reg(reg: Reg, flags: MemFlags) -> MemArg { + MemArg::BXD12 { + base: reg, + index: zero_reg(), + disp: UImm12::zero(), + flags, + } + } + + /// Memory reference using the sum of two registers as an address. + pub fn reg_plus_reg(reg1: Reg, reg2: Reg, flags: MemFlags) -> MemArg { + MemArg::BXD12 { + base: reg1, + index: reg2, + disp: UImm12::zero(), + flags, + } + } + + /// Memory reference using the sum of a register an an offset as address. + pub fn reg_plus_off(reg: Reg, off: i64, flags: MemFlags) -> MemArg { + MemArg::RegOffset { reg, off, flags } + } + + pub(crate) fn get_flags(&self) -> MemFlags { + match self { + MemArg::BXD12 { flags, .. } => *flags, + MemArg::BXD20 { flags, .. } => *flags, + MemArg::RegOffset { flags, .. } => *flags, + MemArg::Label { .. } => MemFlags::trusted(), + MemArg::Symbol { flags, .. } => *flags, + MemArg::InitialSPOffset { .. } => MemFlags::trusted(), + MemArg::NominalSPOffset { .. } => MemFlags::trusted(), + } + } + + pub(crate) fn can_trap(&self) -> bool { + !self.get_flags().notrap() + } +} + +//============================================================================= +// Instruction sub-components (conditions, branches and branch targets): +// definitions + +/// Condition for conditional branches. +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub struct Cond { + mask: u8, +} + +impl Cond { + pub fn from_mask(mask: u8) -> Cond { + assert!(mask >= 1 && mask <= 14); + Cond { mask } + } + + pub fn from_intcc(cc: IntCC) -> Cond { + let mask = match cc { + IntCC::Equal => 8, + IntCC::NotEqual => 4 | 2, + IntCC::SignedGreaterThanOrEqual => 8 | 2, + IntCC::SignedGreaterThan => 2, + IntCC::SignedLessThanOrEqual => 8 | 4, + IntCC::SignedLessThan => 4, + IntCC::UnsignedGreaterThanOrEqual => 8 | 2, + IntCC::UnsignedGreaterThan => 2, + IntCC::UnsignedLessThanOrEqual => 8 | 4, + IntCC::UnsignedLessThan => 4, + IntCC::Overflow => 1, + IntCC::NotOverflow => 8 | 4 | 2, + }; + Cond { mask } + } + + pub fn from_floatcc(cc: FloatCC) -> Cond { + let mask = match cc { + FloatCC::Ordered => 8 | 4 | 2, + FloatCC::Unordered => 1, + FloatCC::Equal => 8, + FloatCC::NotEqual => 4 | 2 | 1, + FloatCC::OrderedNotEqual => 4 | 2, + FloatCC::UnorderedOrEqual => 8 | 1, + FloatCC::LessThan => 4, + FloatCC::LessThanOrEqual => 8 | 4, + FloatCC::GreaterThan => 2, + FloatCC::GreaterThanOrEqual => 8 | 2, + FloatCC::UnorderedOrLessThan => 4 | 1, + FloatCC::UnorderedOrLessThanOrEqual => 8 | 4 | 1, + FloatCC::UnorderedOrGreaterThan => 2 | 1, + FloatCC::UnorderedOrGreaterThanOrEqual => 8 | 2 | 1, + }; + Cond { mask } + } + + /// Return the inverted condition. + pub fn invert(self) -> Cond { + Cond { + mask: !self.mask & 15, + } + } + + /// Return the machine encoding of this condition. + pub fn bits(self) -> u8 { + self.mask + } +} + +/// A branch target. Either unresolved (basic-block index) or resolved (offset +/// from end of current instruction). +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum BranchTarget { + /// An unresolved reference to a Label, as passed into + /// `lower_branch_group()`. + Label(MachLabel), + /// A fixed PC offset. + ResolvedOffset(i32), +} + +impl BranchTarget { + /// Return the target's label, if it is a label-based target. + pub fn as_label(self) -> Option { + match self { + BranchTarget::Label(l) => Some(l), + _ => None, + } + } + + /// Return the target's offset, if specified, or zero if label-based. + pub fn as_ri_offset_or_zero(self) -> u16 { + let off = match self { + BranchTarget::ResolvedOffset(off) => off >> 1, + _ => 0, + }; + assert!(off <= 0x7fff); + assert!(off >= -0x8000); + off as u16 + } + + /// Return the target's offset, if specified, or zero if label-based. + pub fn as_ril_offset_or_zero(self) -> u32 { + let off = match self { + BranchTarget::ResolvedOffset(off) => off >> 1, + _ => 0, + }; + off as u32 + } +} + +impl PrettyPrint for MemArg { + fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String { + match self { + &MemArg::BXD12 { + base, index, disp, .. + } => { + if base != zero_reg() { + if index != zero_reg() { + format!( + "{}({},{})", + disp.show_rru(mb_rru), + index.show_rru(mb_rru), + base.show_rru(mb_rru) + ) + } else { + format!("{}({})", disp.show_rru(mb_rru), base.show_rru(mb_rru)) + } + } else { + if index != zero_reg() { + format!("{}({},)", disp.show_rru(mb_rru), index.show_rru(mb_rru)) + } else { + format!("{}", disp.show_rru(mb_rru)) + } + } + } + &MemArg::BXD20 { + base, index, disp, .. + } => { + if base != zero_reg() { + if index != zero_reg() { + format!( + "{}({},{})", + disp.show_rru(mb_rru), + index.show_rru(mb_rru), + base.show_rru(mb_rru) + ) + } else { + format!("{}({})", disp.show_rru(mb_rru), base.show_rru(mb_rru)) + } + } else { + if index != zero_reg() { + format!("{}({},)", disp.show_rru(mb_rru), index.show_rru(mb_rru)) + } else { + format!("{}", disp.show_rru(mb_rru)) + } + } + } + &MemArg::Label { ref target } => target.show_rru(mb_rru), + &MemArg::Symbol { + ref name, offset, .. + } => format!("{} + {}", name, offset), + // Eliminated by `mem_finalize()`. + &MemArg::InitialSPOffset { .. } + | &MemArg::NominalSPOffset { .. } + | &MemArg::RegOffset { .. } => { + panic!("Unexpected pseudo mem-arg mode (stack-offset or generic reg-offset)!") + } + } + } +} + +impl PrettyPrint for Cond { + fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String { + let s = match self.mask { + 1 => "o", + 2 => "h", + 3 => "nle", + 4 => "l", + 5 => "nhe", + 6 => "lh", + 7 => "ne", + 8 => "e", + 9 => "nlh", + 10 => "he", + 11 => "nl", + 12 => "le", + 13 => "nh", + 14 => "no", + _ => unreachable!(), + }; + s.to_string() + } +} + +impl PrettyPrint for BranchTarget { + fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String { + match self { + &BranchTarget::Label(label) => format!("label{:?}", label.get()), + &BranchTarget::ResolvedOffset(off) => format!("{}", off), + } + } +} diff --git a/cranelift/codegen/src/isa/s390x/inst/emit.rs b/cranelift/codegen/src/isa/s390x/inst/emit.rs new file mode 100644 index 0000000000..da1574fdf3 --- /dev/null +++ b/cranelift/codegen/src/isa/s390x/inst/emit.rs @@ -0,0 +1,1965 @@ +//! S390x ISA: binary code emission. + +use crate::binemit::{Reloc, StackMap}; +use crate::ir::condcodes::IntCC; +use crate::ir::MemFlags; +use crate::ir::{SourceLoc, TrapCode}; +use crate::isa::s390x::inst::*; +use core::convert::TryFrom; +use log::debug; +use regalloc::{Reg, RegClass}; + +/// Memory addressing mode finalization: convert "special" modes (e.g., +/// generic arbitrary stack offset) into real addressing modes, possibly by +/// emitting some helper instructions that come immediately before the use +/// of this amode. +pub fn mem_finalize( + mem: &MemArg, + state: &EmitState, + have_d12: bool, + have_d20: bool, + have_pcrel: bool, + have_index: bool, +) -> (SmallVec<[Inst; 4]>, MemArg) { + let mut insts = SmallVec::new(); + + // Resolve virtual addressing modes. + let mem = match mem { + &MemArg::RegOffset { off, .. } + | &MemArg::InitialSPOffset { off } + | &MemArg::NominalSPOffset { off } => { + let base = match mem { + &MemArg::RegOffset { reg, .. } => reg, + &MemArg::InitialSPOffset { .. } | &MemArg::NominalSPOffset { .. } => stack_reg(), + _ => unreachable!(), + }; + let adj = match mem { + &MemArg::InitialSPOffset { .. } => { + state.initial_sp_offset + state.virtual_sp_offset + } + &MemArg::NominalSPOffset { .. } => state.virtual_sp_offset, + _ => 0, + }; + let off = off + adj; + + if let Some(disp) = UImm12::maybe_from_u64(off as u64) { + MemArg::BXD12 { + base, + index: zero_reg(), + disp, + flags: mem.get_flags(), + } + } else if let Some(disp) = SImm20::maybe_from_i64(off) { + MemArg::BXD20 { + base, + index: zero_reg(), + disp, + flags: mem.get_flags(), + } + } else { + let tmp = writable_spilltmp_reg(); + assert!(base != tmp.to_reg()); + insts.extend(Inst::load_constant64(tmp, off as u64)); + MemArg::reg_plus_reg(base, tmp.to_reg(), mem.get_flags()) + } + } + _ => mem.clone(), + }; + + // If this addressing mode cannot be handled by the instruction, use load-address. + let need_load_address = match &mem { + &MemArg::Label { .. } | &MemArg::Symbol { .. } if !have_pcrel => true, + &MemArg::BXD20 { .. } if !have_d20 => true, + &MemArg::BXD12 { index, .. } | &MemArg::BXD20 { index, .. } if !have_index => { + index != zero_reg() + } + _ => false, + }; + let mem = if need_load_address { + let flags = mem.get_flags(); + let tmp = writable_spilltmp_reg(); + insts.push(Inst::LoadAddr { rd: tmp, mem }); + MemArg::reg(tmp.to_reg(), flags) + } else { + mem + }; + + // Convert 12-bit displacement to 20-bit if required. + let mem = match &mem { + &MemArg::BXD12 { + base, + index, + disp, + flags, + } if !have_d12 => { + assert!(have_d20); + MemArg::BXD20 { + base, + index, + disp: SImm20::from_uimm12(disp), + flags, + } + } + _ => mem, + }; + + (insts, mem) +} + +pub fn mem_emit( + rd: Reg, + mem: &MemArg, + opcode_rx: Option, + opcode_rxy: Option, + opcode_ril: Option, + add_trap: bool, + sink: &mut MachBuffer, + emit_info: &EmitInfo, + state: &mut EmitState, +) { + let (mem_insts, mem) = mem_finalize( + mem, + state, + opcode_rx.is_some(), + opcode_rxy.is_some(), + opcode_ril.is_some(), + true, + ); + for inst in mem_insts.into_iter() { + inst.emit(sink, emit_info, state); + } + + if add_trap && mem.can_trap() { + let srcloc = state.cur_srcloc(); + if srcloc != SourceLoc::default() { + sink.add_trap(srcloc, TrapCode::HeapOutOfBounds); + } + } + + match &mem { + &MemArg::BXD12 { + base, index, disp, .. + } => { + put( + sink, + &enc_rx(opcode_rx.unwrap(), rd, base, index, disp.bits()), + ); + } + &MemArg::BXD20 { + base, index, disp, .. + } => { + put( + sink, + &enc_rxy(opcode_rxy.unwrap(), rd, base, index, disp.bits()), + ); + } + &MemArg::Label { ref target } => { + if let Some(l) = target.as_label() { + sink.use_label_at_offset(sink.cur_offset(), l, LabelUse::BranchRIL); + } + put( + sink, + &enc_ril_b(opcode_ril.unwrap(), rd, target.as_ril_offset_or_zero()), + ); + } + &MemArg::Symbol { + ref name, offset, .. + } => { + let reloc = Reloc::S390xPCRel32Dbl; + let srcloc = state.cur_srcloc(); + put_with_reloc( + sink, + &enc_ril_b(opcode_ril.unwrap(), rd, 0), + 2, + srcloc, + reloc, + name, + offset.into(), + ); + } + _ => unreachable!(), + } +} + +pub fn mem_imm8_emit( + imm: u8, + mem: &MemArg, + opcode_si: u16, + opcode_siy: u16, + add_trap: bool, + sink: &mut MachBuffer, + emit_info: &EmitInfo, + state: &mut EmitState, +) { + let (mem_insts, mem) = mem_finalize(mem, state, true, true, false, false); + for inst in mem_insts.into_iter() { + inst.emit(sink, emit_info, state); + } + + if add_trap && mem.can_trap() { + let srcloc = state.cur_srcloc(); + if srcloc != SourceLoc::default() { + sink.add_trap(srcloc, TrapCode::HeapOutOfBounds); + } + } + + match &mem { + &MemArg::BXD12 { + base, index, disp, .. + } => { + assert!(index == zero_reg()); + put(sink, &enc_si(opcode_si, base, disp.bits(), imm)); + } + &MemArg::BXD20 { + base, index, disp, .. + } => { + assert!(index == zero_reg()); + put(sink, &enc_siy(opcode_siy, base, disp.bits(), imm)); + } + _ => unreachable!(), + } +} + +pub fn mem_imm16_emit( + imm: i16, + mem: &MemArg, + opcode_sil: u16, + add_trap: bool, + sink: &mut MachBuffer, + emit_info: &EmitInfo, + state: &mut EmitState, +) { + let (mem_insts, mem) = mem_finalize(mem, state, true, false, false, false); + for inst in mem_insts.into_iter() { + inst.emit(sink, emit_info, state); + } + + if add_trap && mem.can_trap() { + let srcloc = state.cur_srcloc(); + if srcloc != SourceLoc::default() { + sink.add_trap(srcloc, TrapCode::HeapOutOfBounds); + } + } + + match &mem { + &MemArg::BXD12 { + base, index, disp, .. + } => { + assert!(index == zero_reg()); + put(sink, &enc_sil(opcode_sil, base, disp.bits(), imm)); + } + _ => unreachable!(), + } +} + +//============================================================================= +// Instructions and subcomponents: emission + +fn machreg_to_gpr(m: Reg) -> u8 { + assert_eq!(m.get_class(), RegClass::I64); + u8::try_from(m.to_real_reg().get_hw_encoding()).unwrap() +} + +fn machreg_to_fpr(m: Reg) -> u8 { + assert_eq!(m.get_class(), RegClass::F64); + u8::try_from(m.to_real_reg().get_hw_encoding()).unwrap() +} + +fn machreg_to_gpr_or_fpr(m: Reg) -> u8 { + u8::try_from(m.to_real_reg().get_hw_encoding()).unwrap() +} + +/// E-type instructions. +/// +/// 15 +/// opcode +/// 0 +/// +fn enc_e(opcode: u16) -> [u8; 2] { + let mut enc: [u8; 2] = [0; 2]; + let opcode1 = ((opcode >> 8) & 0xff) as u8; + let opcode2 = (opcode & 0xff) as u8; + + enc[0] = opcode1; + enc[1] = opcode2; + enc +} + +/// RIa-type instructions. +/// +/// 31 23 19 15 +/// opcode1 r1 opcode2 i2 +/// 24 20 16 0 +/// +fn enc_ri_a(opcode: u16, r1: Reg, i2: u16) -> [u8; 4] { + let mut enc: [u8; 4] = [0; 4]; + let opcode1 = ((opcode >> 4) & 0xff) as u8; + let opcode2 = (opcode & 0xf) as u8; + let r1 = machreg_to_gpr(r1) & 0x0f; + + enc[0] = opcode1; + enc[1] = r1 << 4 | opcode2; + enc[2..].copy_from_slice(&i2.to_be_bytes()); + enc +} + +/// RIb-type instructions. +/// +/// 31 23 19 15 +/// opcode1 r1 opcode2 ri2 +/// 24 20 16 0 +/// +fn enc_ri_b(opcode: u16, r1: Reg, ri2: i32) -> [u8; 4] { + let mut enc: [u8; 4] = [0; 4]; + let opcode1 = ((opcode >> 4) & 0xff) as u8; + let opcode2 = (opcode & 0xf) as u8; + let r1 = machreg_to_gpr(r1) & 0x0f; + let ri2 = ((ri2 >> 1) & 0xffff) as u16; + + enc[0] = opcode1; + enc[1] = r1 << 4 | opcode2; + enc[2..].copy_from_slice(&ri2.to_be_bytes()); + enc +} + +/// RIc-type instructions. +/// +/// 31 23 19 15 +/// opcode1 m1 opcode2 ri2 +/// 24 20 16 0 +/// +fn enc_ri_c(opcode: u16, m1: u8, ri2: i32) -> [u8; 4] { + let mut enc: [u8; 4] = [0; 4]; + let opcode1 = ((opcode >> 4) & 0xff) as u8; + let opcode2 = (opcode & 0xf) as u8; + let m1 = m1 & 0x0f; + let ri2 = ((ri2 >> 1) & 0xffff) as u16; + + enc[0] = opcode1; + enc[1] = m1 << 4 | opcode2; + enc[2..].copy_from_slice(&ri2.to_be_bytes()); + enc +} + +/// RIEa-type instructions. +/// +/// 47 39 35 31 15 11 7 +/// opcode1 r1 -- i2 m3 -- opcode2 +/// 40 36 32 16 12 8 0 +/// +fn enc_rie_a(opcode: u16, r1: Reg, i2: u16, m3: u8) -> [u8; 6] { + let mut enc: [u8; 6] = [0; 6]; + let opcode1 = ((opcode >> 8) & 0xff) as u8; + let opcode2 = (opcode & 0xff) as u8; + let r1 = machreg_to_gpr(r1) & 0x0f; + let m3 = m3 & 0x0f; + + enc[0] = opcode1; + enc[1] = r1 << 4; + enc[2..4].copy_from_slice(&i2.to_be_bytes()); + enc[4] = m3 << 4; + enc[5] = opcode2; + enc +} + +/// RIEd-type instructions. +/// +/// 47 39 35 31 15 7 +/// opcode1 r1 r3 i2 -- opcode2 +/// 40 36 32 16 8 0 +/// +fn enc_rie_d(opcode: u16, r1: Reg, r3: Reg, i2: u16) -> [u8; 6] { + let mut enc: [u8; 6] = [0; 6]; + let opcode1 = ((opcode >> 8) & 0xff) as u8; + let opcode2 = (opcode & 0xff) as u8; + let r1 = machreg_to_gpr(r1) & 0x0f; + let r3 = machreg_to_gpr(r3) & 0x0f; + + enc[0] = opcode1; + enc[1] = r1 << 4 | r3; + enc[2..4].copy_from_slice(&i2.to_be_bytes()); + enc[5] = opcode2; + enc +} + +/// RIEg-type instructions. +/// +/// 47 39 35 31 15 7 +/// opcode1 r1 m3 i2 -- opcode2 +/// 40 36 32 16 8 0 +/// +fn enc_rie_g(opcode: u16, r1: Reg, i2: u16, m3: u8) -> [u8; 6] { + let mut enc: [u8; 6] = [0; 6]; + let opcode1 = ((opcode >> 8) & 0xff) as u8; + let opcode2 = (opcode & 0xff) as u8; + let r1 = machreg_to_gpr(r1) & 0x0f; + let m3 = m3 & 0x0f; + + enc[0] = opcode1; + enc[1] = r1 << 4 | m3; + enc[2..4].copy_from_slice(&i2.to_be_bytes()); + enc[5] = opcode2; + enc +} + +/// RILa-type instructions. +/// +/// 47 39 35 31 +/// opcode1 r1 opcode2 i2 +/// 40 36 32 0 +/// +fn enc_ril_a(opcode: u16, r1: Reg, i2: u32) -> [u8; 6] { + let mut enc: [u8; 6] = [0; 6]; + let opcode1 = ((opcode >> 4) & 0xff) as u8; + let opcode2 = (opcode & 0xf) as u8; + let r1 = machreg_to_gpr(r1) & 0x0f; + + enc[0] = opcode1; + enc[1] = r1 << 4 | opcode2; + enc[2..].copy_from_slice(&i2.to_be_bytes()); + enc +} + +/// RILb-type instructions. +/// +/// 47 39 35 31 +/// opcode1 r1 opcode2 ri2 +/// 40 36 32 0 +/// +fn enc_ril_b(opcode: u16, r1: Reg, ri2: u32) -> [u8; 6] { + let mut enc: [u8; 6] = [0; 6]; + let opcode1 = ((opcode >> 4) & 0xff) as u8; + let opcode2 = (opcode & 0xf) as u8; + let r1 = machreg_to_gpr(r1) & 0x0f; + + enc[0] = opcode1; + enc[1] = r1 << 4 | opcode2; + enc[2..].copy_from_slice(&ri2.to_be_bytes()); + enc +} + +/// RILc-type instructions. +/// +/// 47 39 35 31 +/// opcode1 m1 opcode2 i2 +/// 40 36 32 0 +/// +fn enc_ril_c(opcode: u16, m1: u8, ri2: u32) -> [u8; 6] { + let mut enc: [u8; 6] = [0; 6]; + let opcode1 = ((opcode >> 4) & 0xff) as u8; + let opcode2 = (opcode & 0xf) as u8; + let m1 = m1 & 0x0f; + + enc[0] = opcode1; + enc[1] = m1 << 4 | opcode2; + enc[2..].copy_from_slice(&ri2.to_be_bytes()); + enc +} + +/// RR-type instructions. +/// +/// 15 7 3 +/// opcode r1 r2 +/// 8 4 0 +/// +fn enc_rr(opcode: u16, r1: Reg, r2: Reg) -> [u8; 2] { + let mut enc: [u8; 2] = [0; 2]; + let opcode = (opcode & 0xff) as u8; + let r1 = machreg_to_gpr_or_fpr(r1) & 0x0f; + let r2 = machreg_to_gpr_or_fpr(r2) & 0x0f; + + enc[0] = opcode; + enc[1] = r1 << 4 | r2; + enc +} + +/// RRD-type instructions. +/// +/// 31 15 11 7 3 +/// opcode r1 -- r3 r2 +/// 16 12 8 4 0 +/// +fn enc_rrd(opcode: u16, r1: Reg, r2: Reg, r3: Reg) -> [u8; 4] { + let mut enc: [u8; 4] = [0; 4]; + let opcode1 = ((opcode >> 8) & 0xff) as u8; + let opcode2 = (opcode & 0xff) as u8; + let r1 = machreg_to_fpr(r1) & 0x0f; + let r2 = machreg_to_fpr(r2) & 0x0f; + let r3 = machreg_to_fpr(r3) & 0x0f; + + enc[0] = opcode1; + enc[1] = opcode2; + enc[2] = r1 << 4; + enc[3] = r3 << 4 | r2; + enc +} + +/// RRE-type instructions. +/// +/// 31 15 7 3 +/// opcode -- r1 r2 +/// 16 8 4 0 +/// +fn enc_rre(opcode: u16, r1: Reg, r2: Reg) -> [u8; 4] { + let mut enc: [u8; 4] = [0; 4]; + let opcode1 = ((opcode >> 8) & 0xff) as u8; + let opcode2 = (opcode & 0xff) as u8; + let r1 = machreg_to_gpr_or_fpr(r1) & 0x0f; + let r2 = machreg_to_gpr_or_fpr(r2) & 0x0f; + + enc[0] = opcode1; + enc[1] = opcode2; + enc[3] = r1 << 4 | r2; + enc +} + +/// RRFa/b-type instructions. +/// +/// 31 15 11 7 3 +/// opcode r3 m4 r1 r2 +/// 16 12 8 4 0 +/// +fn enc_rrf_ab(opcode: u16, r1: Reg, r2: Reg, r3: Reg, m4: u8) -> [u8; 4] { + let mut enc: [u8; 4] = [0; 4]; + let opcode1 = ((opcode >> 8) & 0xff) as u8; + let opcode2 = (opcode & 0xff) as u8; + let r1 = machreg_to_gpr_or_fpr(r1) & 0x0f; + let r2 = machreg_to_gpr_or_fpr(r2) & 0x0f; + let r3 = machreg_to_gpr_or_fpr(r3) & 0x0f; + let m4 = m4 & 0x0f; + + enc[0] = opcode1; + enc[1] = opcode2; + enc[2] = r3 << 4 | m4; + enc[3] = r1 << 4 | r2; + enc +} + +/// RRFc/d/e-type instructions. +/// +/// 31 15 11 7 3 +/// opcode m3 m4 r1 r2 +/// 16 12 8 4 0 +/// +fn enc_rrf_cde(opcode: u16, r1: Reg, r2: Reg, m3: u8, m4: u8) -> [u8; 4] { + let mut enc: [u8; 4] = [0; 4]; + let opcode1 = ((opcode >> 8) & 0xff) as u8; + let opcode2 = (opcode & 0xff) as u8; + let r1 = machreg_to_gpr_or_fpr(r1) & 0x0f; + let r2 = machreg_to_gpr_or_fpr(r2) & 0x0f; + let m3 = m3 & 0x0f; + let m4 = m4 & 0x0f; + + enc[0] = opcode1; + enc[1] = opcode2; + enc[2] = m3 << 4 | m4; + enc[3] = r1 << 4 | r2; + enc +} + +/// RS-type instructions. +/// +/// 31 23 19 15 11 +/// opcode r1 r3 b2 d2 +/// 24 20 16 12 0 +/// +fn enc_rs(opcode: u16, r1: Reg, r3: Reg, b2: Reg, d2: u32) -> [u8; 4] { + let opcode = (opcode & 0xff) as u8; + let r1 = machreg_to_gpr_or_fpr(r1) & 0x0f; + let r3 = machreg_to_gpr_or_fpr(r3) & 0x0f; + let b2 = machreg_to_gpr(b2) & 0x0f; + let d2_lo = (d2 & 0xff) as u8; + let d2_hi = ((d2 >> 8) & 0x0f) as u8; + + let mut enc: [u8; 4] = [0; 4]; + enc[0] = opcode; + enc[1] = r1 << 4 | r3; + enc[2] = b2 << 4 | d2_hi; + enc[3] = d2_lo; + enc +} + +/// RSY-type instructions. +/// +/// 47 39 35 31 27 15 7 +/// opcode1 r1 r3 b2 dl2 dh2 opcode2 +/// 40 36 32 28 16 8 0 +/// +fn enc_rsy(opcode: u16, r1: Reg, r3: Reg, b2: Reg, d2: u32) -> [u8; 6] { + let opcode1 = ((opcode >> 8) & 0xff) as u8; + let opcode2 = (opcode & 0xff) as u8; + let r1 = machreg_to_gpr_or_fpr(r1) & 0x0f; + let r3 = machreg_to_gpr_or_fpr(r3) & 0x0f; + let b2 = machreg_to_gpr(b2) & 0x0f; + let dl2_lo = (d2 & 0xff) as u8; + let dl2_hi = ((d2 >> 8) & 0x0f) as u8; + let dh2 = ((d2 >> 12) & 0xff) as u8; + + let mut enc: [u8; 6] = [0; 6]; + enc[0] = opcode1; + enc[1] = r1 << 4 | r3; + enc[2] = b2 << 4 | dl2_hi; + enc[3] = dl2_lo; + enc[4] = dh2; + enc[5] = opcode2; + enc +} + +/// RX-type instructions. +/// +/// 31 23 19 15 11 +/// opcode r1 x2 b2 d2 +/// 24 20 16 12 0 +/// +fn enc_rx(opcode: u16, r1: Reg, b2: Reg, x2: Reg, d2: u32) -> [u8; 4] { + let opcode = (opcode & 0xff) as u8; + let r1 = machreg_to_gpr_or_fpr(r1) & 0x0f; + let b2 = machreg_to_gpr(b2) & 0x0f; + let x2 = machreg_to_gpr(x2) & 0x0f; + let d2_lo = (d2 & 0xff) as u8; + let d2_hi = ((d2 >> 8) & 0x0f) as u8; + + let mut enc: [u8; 4] = [0; 4]; + enc[0] = opcode; + enc[1] = r1 << 4 | x2; + enc[2] = b2 << 4 | d2_hi; + enc[3] = d2_lo; + enc +} + +/// RXY-type instructions. +/// +/// 47 39 35 31 27 15 7 +/// opcode1 r1 x2 b2 dl2 dh2 opcode2 +/// 40 36 32 28 16 8 0 +/// +fn enc_rxy(opcode: u16, r1: Reg, b2: Reg, x2: Reg, d2: u32) -> [u8; 6] { + let opcode1 = ((opcode >> 8) & 0xff) as u8; + let opcode2 = (opcode & 0xff) as u8; + let r1 = machreg_to_gpr_or_fpr(r1) & 0x0f; + let b2 = machreg_to_gpr(b2) & 0x0f; + let x2 = machreg_to_gpr(x2) & 0x0f; + let dl2_lo = (d2 & 0xff) as u8; + let dl2_hi = ((d2 >> 8) & 0x0f) as u8; + let dh2 = ((d2 >> 12) & 0xff) as u8; + + let mut enc: [u8; 6] = [0; 6]; + enc[0] = opcode1; + enc[1] = r1 << 4 | x2; + enc[2] = b2 << 4 | dl2_hi; + enc[3] = dl2_lo; + enc[4] = dh2; + enc[5] = opcode2; + enc +} + +/// SI-type instructions. +/// +/// 31 23 15 11 +/// opcode i2 b1 d1 +/// 24 16 12 0 +/// +fn enc_si(opcode: u16, b1: Reg, d1: u32, i2: u8) -> [u8; 4] { + let opcode = (opcode & 0xff) as u8; + let b1 = machreg_to_gpr(b1) & 0x0f; + let d1_lo = (d1 & 0xff) as u8; + let d1_hi = ((d1 >> 8) & 0x0f) as u8; + + let mut enc: [u8; 4] = [0; 4]; + enc[0] = opcode; + enc[1] = i2; + enc[2] = b1 << 4 | d1_hi; + enc[3] = d1_lo; + enc +} + +/// SIL-type instructions. +/// +/// 47 31 27 15 +/// opcode b1 d1 i2 +/// 32 28 16 0 +/// +fn enc_sil(opcode: u16, b1: Reg, d1: u32, i2: i16) -> [u8; 6] { + let opcode1 = ((opcode >> 8) & 0xff) as u8; + let opcode2 = (opcode & 0xff) as u8; + let b1 = machreg_to_gpr(b1) & 0x0f; + let d1_lo = (d1 & 0xff) as u8; + let d1_hi = ((d1 >> 8) & 0x0f) as u8; + + let mut enc: [u8; 6] = [0; 6]; + enc[0] = opcode1; + enc[1] = opcode2; + enc[2] = b1 << 4 | d1_hi; + enc[3] = d1_lo; + enc[4..].copy_from_slice(&i2.to_be_bytes()); + enc +} + +/// SIY-type instructions. +/// +/// 47 39 31 27 15 7 +/// opcode1 i2 b1 dl1 dh1 opcode2 +/// 40 32 28 16 8 0 +/// +fn enc_siy(opcode: u16, b1: Reg, d1: u32, i2: u8) -> [u8; 6] { + let opcode1 = ((opcode >> 8) & 0xff) as u8; + let opcode2 = (opcode & 0xff) as u8; + let b1 = machreg_to_gpr(b1) & 0x0f; + let dl1_lo = (d1 & 0xff) as u8; + let dl1_hi = ((d1 >> 8) & 0x0f) as u8; + let dh1 = ((d1 >> 12) & 0xff) as u8; + + let mut enc: [u8; 6] = [0; 6]; + enc[0] = opcode1; + enc[1] = i2; + enc[2] = b1 << 4 | dl1_hi; + enc[3] = dl1_lo; + enc[4] = dh1; + enc[5] = opcode2; + enc +} + +/// VRR-type instructions. +/// +/// 47 39 35 31 27 23 19 15 11 7 +/// opcode1 v1 v2 v3 - m6 m5 m4 rxb opcode2 +/// 40 36 32 28 24 20 16 12 8 0 +/// +fn enc_vrr(opcode: u16, v1: Reg, v2: Reg, v3: Reg, m4: u8, m5: u8, m6: u8) -> [u8; 6] { + let opcode1 = ((opcode >> 8) & 0xff) as u8; + let opcode2 = (opcode & 0xff) as u8; + let rxb = 0; // FIXME + let v1 = machreg_to_fpr(v1) & 0x0f; // FIXME + let v2 = machreg_to_fpr(v2) & 0x0f; // FIXME + let v3 = machreg_to_fpr(v3) & 0x0f; // FIXME + let m4 = m4 & 0x0f; + let m5 = m5 & 0x0f; + let m6 = m6 & 0x0f; + + let mut enc: [u8; 6] = [0; 6]; + enc[0] = opcode1; + enc[1] = v1 << 4 | v2; + enc[2] = v3 << 4; + enc[3] = m6 << 4 | m5; + enc[4] = m4 << 4 | rxb; + enc[5] = opcode2; + enc +} + +/// VRX-type instructions. +/// +/// 47 39 35 31 27 15 11 7 +/// opcode1 v1 x2 b2 d2 m3 rxb opcode2 +/// 40 36 32 28 16 12 8 0 +/// +fn enc_vrx(opcode: u16, v1: Reg, b2: Reg, x2: Reg, d2: u32, m3: u8) -> [u8; 6] { + let opcode1 = ((opcode >> 8) & 0xff) as u8; + let opcode2 = (opcode & 0xff) as u8; + let rxb = 0; // FIXME + let v1 = machreg_to_fpr(v1) & 0x0f; // FIXME + let b2 = machreg_to_gpr(b2) & 0x0f; + let x2 = machreg_to_gpr(x2) & 0x0f; + let d2_lo = (d2 & 0xff) as u8; + let d2_hi = ((d2 >> 8) & 0x0f) as u8; + let m3 = m3 & 0x0f; + + let mut enc: [u8; 6] = [0; 6]; + enc[0] = opcode1; + enc[1] = v1 << 4 | x2; + enc[2] = b2 << 4 | d2_hi; + enc[3] = d2_lo; + enc[4] = m3 << 4 | rxb; + enc[5] = opcode2; + enc +} + +/// Emit encoding to sink. +fn put(sink: &mut MachBuffer, enc: &[u8]) { + for byte in enc { + sink.put1(*byte); + } +} + +/// Emit encoding to sink, adding a trap on the last byte. +fn put_with_trap(sink: &mut MachBuffer, enc: &[u8], srcloc: SourceLoc, trap_code: TrapCode) { + let len = enc.len(); + for i in 0..len - 1 { + sink.put1(enc[i]); + } + sink.add_trap(srcloc, trap_code); + sink.put1(enc[len - 1]); +} + +/// Emit encoding to sink, adding a relocation at byte offset. +fn put_with_reloc( + sink: &mut MachBuffer, + enc: &[u8], + offset: usize, + ri2_srcloc: SourceLoc, + ri2_reloc: Reloc, + ri2_name: &ExternalName, + ri2_offset: i64, +) { + let len = enc.len(); + for i in 0..offset { + sink.put1(enc[i]); + } + sink.add_reloc(ri2_srcloc, ri2_reloc, ri2_name, ri2_offset + offset as i64); + for i in offset..len { + sink.put1(enc[i]); + } +} + +/// State carried between emissions of a sequence of instructions. +#[derive(Default, Clone, Debug)] +pub struct EmitState { + pub(crate) initial_sp_offset: i64, + pub(crate) virtual_sp_offset: i64, + /// Safepoint stack map for upcoming instruction, as provided to `pre_safepoint()`. + stack_map: Option, + /// Current source-code location corresponding to instruction to be emitted. + cur_srcloc: SourceLoc, +} + +impl MachInstEmitState for EmitState { + fn new(abi: &dyn ABICallee) -> Self { + EmitState { + virtual_sp_offset: 0, + initial_sp_offset: abi.frame_size() as i64, + stack_map: None, + cur_srcloc: SourceLoc::default(), + } + } + + fn pre_safepoint(&mut self, stack_map: StackMap) { + self.stack_map = Some(stack_map); + } + + fn pre_sourceloc(&mut self, srcloc: SourceLoc) { + self.cur_srcloc = srcloc; + } +} + +impl EmitState { + fn take_stack_map(&mut self) -> Option { + self.stack_map.take() + } + + fn clear_post_insn(&mut self) { + self.stack_map = None; + } + + fn cur_srcloc(&self) -> SourceLoc { + self.cur_srcloc + } +} + +/// Constant state used during function compilation. +pub struct EmitInfo(settings::Flags); + +impl EmitInfo { + pub(crate) fn new(flags: settings::Flags) -> Self { + Self(flags) + } +} + +impl MachInstEmitInfo for EmitInfo { + fn flags(&self) -> &settings::Flags { + &self.0 + } +} + +impl MachInstEmit for Inst { + type State = EmitState; + type Info = EmitInfo; + + fn emit(&self, sink: &mut MachBuffer, emit_info: &Self::Info, state: &mut EmitState) { + // N.B.: we *must* not exceed the "worst-case size" used to compute + // where to insert islands, except when islands are explicitly triggered + // (with an `EmitIsland`). We check this in debug builds. This is `mut` + // to allow disabling the check for `JTSequence`, which is always + // emitted following an `EmitIsland`. + let mut start_off = sink.cur_offset(); + + match self { + &Inst::AluRRR { alu_op, rd, rn, rm } => { + let (opcode, have_rr) = match alu_op { + ALUOp::Add32 => (0xb9f8, true), // ARK + ALUOp::Add64 => (0xb9e8, true), // AGRK + ALUOp::Sub32 => (0xb9f9, true), // SRK + ALUOp::Sub64 => (0xb9e9, true), // SGRK + ALUOp::Mul32 => (0xb9fd, true), // MSRKC + ALUOp::Mul64 => (0xb9ed, true), // MSGRKC + ALUOp::And32 => (0xb9f4, true), // NRK + ALUOp::And64 => (0xb9e4, true), // NGRK + ALUOp::Orr32 => (0xb9f6, true), // ORK + ALUOp::Orr64 => (0xb9e6, true), // OGRK + ALUOp::Xor32 => (0xb9f7, true), // XRK + ALUOp::Xor64 => (0xb9e7, true), // XGRK + ALUOp::AndNot32 => (0xb974, false), // NNRK + ALUOp::AndNot64 => (0xb964, false), // NNGRK + ALUOp::OrrNot32 => (0xb976, false), // NORK + ALUOp::OrrNot64 => (0xb966, false), // NOGRK + ALUOp::XorNot32 => (0xb977, false), // NXRK + ALUOp::XorNot64 => (0xb967, false), // NXGRK + _ => unreachable!(), + }; + if have_rr && rd.to_reg() == rn { + let inst = Inst::AluRR { alu_op, rd, rm }; + inst.emit(sink, emit_info, state); + } else { + put(sink, &enc_rrf_ab(opcode, rd.to_reg(), rn, rm, 0)); + } + } + &Inst::AluRRSImm16 { + alu_op, + rd, + rn, + imm, + } => { + if rd.to_reg() == rn { + let inst = Inst::AluRSImm16 { alu_op, rd, imm }; + inst.emit(sink, emit_info, state); + } else { + let opcode = match alu_op { + ALUOp::Add32 => 0xecd8, // AHIK + ALUOp::Add64 => 0xecd9, // AGHIK + _ => unreachable!(), + }; + put(sink, &enc_rie_d(opcode, rd.to_reg(), rn, imm as u16)); + } + } + &Inst::AluRR { alu_op, rd, rm } => { + let (opcode, is_rre) = match alu_op { + ALUOp::Add32 => (0x1a, false), // AR + ALUOp::Add64 => (0xb908, true), // AGR + ALUOp::Add64Ext32 => (0xb918, true), // AGFR + ALUOp::Sub32 => (0x1b, false), // SR + ALUOp::Sub64 => (0xb909, true), // SGR + ALUOp::Sub64Ext32 => (0xb919, true), // SGFR + ALUOp::Mul32 => (0xb252, true), // MSR + ALUOp::Mul64 => (0xb90c, true), // MSGR + ALUOp::Mul64Ext32 => (0xb91c, true), // MSGFR + ALUOp::And32 => (0x14, false), // NR + ALUOp::And64 => (0xb980, true), // NGR + ALUOp::Orr32 => (0x16, false), // OR + ALUOp::Orr64 => (0xb981, true), // OGR + ALUOp::Xor32 => (0x17, false), // XR + ALUOp::Xor64 => (0xb982, true), // XGR + _ => unreachable!(), + }; + if is_rre { + put(sink, &enc_rre(opcode, rd.to_reg(), rm)); + } else { + put(sink, &enc_rr(opcode, rd.to_reg(), rm)); + } + } + &Inst::AluRX { + alu_op, + rd, + ref mem, + } => { + let (opcode_rx, opcode_rxy) = match alu_op { + ALUOp::Add32 => (Some(0x5a), Some(0xe35a)), // A(Y) + ALUOp::Add32Ext16 => (Some(0x4a), Some(0xe34a)), // AH(Y) + ALUOp::Add64 => (None, Some(0xe308)), // AG + ALUOp::Add64Ext16 => (None, Some(0xe338)), // AGH + ALUOp::Add64Ext32 => (None, Some(0xe318)), // AGF + ALUOp::Sub32 => (Some(0x5b), Some(0xe35b)), // S(Y) + ALUOp::Sub32Ext16 => (Some(0x4b), Some(0xe37b)), // SH(Y) + ALUOp::Sub64 => (None, Some(0xe309)), // SG + ALUOp::Sub64Ext16 => (None, Some(0xe339)), // SGH + ALUOp::Sub64Ext32 => (None, Some(0xe319)), // SGF + ALUOp::Mul32 => (Some(0x71), Some(0xe351)), // MS(Y) + ALUOp::Mul32Ext16 => (Some(0x4c), Some(0xe37c)), // MH(Y) + ALUOp::Mul64 => (None, Some(0xe30c)), // MSG + ALUOp::Mul64Ext16 => (None, Some(0xe33c)), // MSH + ALUOp::Mul64Ext32 => (None, Some(0xe31c)), // MSGF + ALUOp::And32 => (Some(0x54), Some(0xe354)), // N(Y) + ALUOp::And64 => (None, Some(0xe380)), // NG + ALUOp::Orr32 => (Some(0x56), Some(0xe356)), // O(Y) + ALUOp::Orr64 => (None, Some(0xe381)), // OG + ALUOp::Xor32 => (Some(0x57), Some(0xe357)), // X(Y) + ALUOp::Xor64 => (None, Some(0xe382)), // XG + _ => unreachable!(), + }; + let rd = rd.to_reg(); + mem_emit( + rd, mem, opcode_rx, opcode_rxy, None, true, sink, emit_info, state, + ); + } + &Inst::AluRSImm16 { alu_op, rd, imm } => { + let opcode = match alu_op { + ALUOp::Add32 => 0xa7a, // AHI + ALUOp::Add64 => 0xa7b, // AGHI + ALUOp::Mul32 => 0xa7c, // MHI + ALUOp::Mul64 => 0xa7d, // MGHI + _ => unreachable!(), + }; + put(sink, &enc_ri_a(opcode, rd.to_reg(), imm as u16)); + } + &Inst::AluRSImm32 { alu_op, rd, imm } => { + let opcode = match alu_op { + ALUOp::Add32 => 0xc29, // AFI + ALUOp::Add64 => 0xc28, // AGFI + ALUOp::Mul32 => 0xc21, // MSFI + ALUOp::Mul64 => 0xc20, // MSGFI + _ => unreachable!(), + }; + put(sink, &enc_ril_a(opcode, rd.to_reg(), imm as u32)); + } + &Inst::AluRUImm32 { alu_op, rd, imm } => { + let opcode = match alu_op { + ALUOp::Add32 => 0xc2b, // ALFI + ALUOp::Add64 => 0xc2a, // ALGFI + ALUOp::Sub32 => 0xc25, // SLFI + ALUOp::Sub64 => 0xc24, // SLGFI + _ => unreachable!(), + }; + put(sink, &enc_ril_a(opcode, rd.to_reg(), imm)); + } + &Inst::AluRUImm16Shifted { alu_op, rd, imm } => { + let opcode = match (alu_op, imm.shift) { + (ALUOp::And32, 0) => 0xa57, // NILL + (ALUOp::And32, 1) => 0xa56, // NILH + (ALUOp::And64, 0) => 0xa57, // NILL + (ALUOp::And64, 1) => 0xa56, // NILH + (ALUOp::And64, 2) => 0xa55, // NIHL + (ALUOp::And64, 3) => 0xa54, // NIHL + (ALUOp::Orr32, 0) => 0xa5b, // OILL + (ALUOp::Orr32, 1) => 0xa5a, // OILH + (ALUOp::Orr64, 0) => 0xa5b, // OILL + (ALUOp::Orr64, 1) => 0xa5a, // OILH + (ALUOp::Orr64, 2) => 0xa59, // OIHL + (ALUOp::Orr64, 3) => 0xa58, // OIHH + _ => unreachable!(), + }; + put(sink, &enc_ri_a(opcode, rd.to_reg(), imm.bits)); + } + &Inst::AluRUImm32Shifted { alu_op, rd, imm } => { + let opcode = match (alu_op, imm.shift) { + (ALUOp::And32, 0) => 0xc0b, // NILF + (ALUOp::And64, 0) => 0xc0b, // NILF + (ALUOp::And64, 1) => 0xc0a, // NIHF + (ALUOp::Orr32, 0) => 0xc0d, // OILF + (ALUOp::Orr64, 0) => 0xc0d, // OILF + (ALUOp::Orr64, 1) => 0xc0c, // OILF + (ALUOp::Xor32, 0) => 0xc07, // XILF + (ALUOp::Xor64, 0) => 0xc07, // XILF + (ALUOp::Xor64, 1) => 0xc06, // XILH + _ => unreachable!(), + }; + put(sink, &enc_ril_a(opcode, rd.to_reg(), imm.bits)); + } + + &Inst::SMulWide { rn, rm } => { + let opcode = 0xb9ec; // MGRK + put(sink, &enc_rrf_ab(opcode, gpr(0), rn, rm, 0)); + } + &Inst::UMulWide { rn } => { + let opcode = 0xb986; // MLGR + put(sink, &enc_rre(opcode, gpr(0), rn)); + } + &Inst::SDivMod32 { rn } => { + let opcode = 0xb91d; // DSGFR + let srcloc = state.cur_srcloc(); + let trap_code = TrapCode::IntegerDivisionByZero; + put_with_trap(sink, &enc_rre(opcode, gpr(0), rn), srcloc, trap_code); + } + &Inst::SDivMod64 { rn } => { + let opcode = 0xb90d; // DSGR + let srcloc = state.cur_srcloc(); + let trap_code = TrapCode::IntegerDivisionByZero; + put_with_trap(sink, &enc_rre(opcode, gpr(0), rn), srcloc, trap_code); + } + &Inst::UDivMod32 { rn } => { + let opcode = 0xb997; // DLR + let srcloc = state.cur_srcloc(); + let trap_code = TrapCode::IntegerDivisionByZero; + put_with_trap(sink, &enc_rre(opcode, gpr(0), rn), srcloc, trap_code); + } + &Inst::UDivMod64 { rn } => { + let opcode = 0xb987; // DLGR + let srcloc = state.cur_srcloc(); + let trap_code = TrapCode::IntegerDivisionByZero; + put_with_trap(sink, &enc_rre(opcode, gpr(0), rn), srcloc, trap_code); + } + &Inst::Flogr { rn } => { + let opcode = 0xb983; // FLOGR + put(sink, &enc_rre(opcode, gpr(0), rn)); + } + + &Inst::ShiftRR { + shift_op, + rd, + rn, + shift_imm, + shift_reg, + } => { + let opcode = match shift_op { + ShiftOp::RotL32 => 0xeb1d, // RLL + ShiftOp::RotL64 => 0xeb1c, // RLLG + ShiftOp::LShL32 => 0xebdf, // SLLK (SLL ?) + ShiftOp::LShL64 => 0xeb0d, // SLLG + ShiftOp::LShR32 => 0xebde, // SRLK (SRL ?) + ShiftOp::LShR64 => 0xeb0c, // SRLG + ShiftOp::AShR32 => 0xebdc, // SRAK (SRA ?) + ShiftOp::AShR64 => 0xeb0a, // SRAG + }; + let shift_reg = match shift_reg { + Some(reg) => reg, + None => zero_reg(), + }; + put( + sink, + &enc_rsy(opcode, rd.to_reg(), rn, shift_reg, shift_imm.bits()), + ); + } + + &Inst::UnaryRR { op, rd, rn } => { + match op { + UnaryOp::Abs32 => { + let opcode = 0x10; // LPR + put(sink, &enc_rr(opcode, rd.to_reg(), rn)); + } + UnaryOp::Abs64 => { + let opcode = 0xb900; // LPGR + put(sink, &enc_rre(opcode, rd.to_reg(), rn)); + } + UnaryOp::Abs64Ext32 => { + let opcode = 0xb910; // LPGFR + put(sink, &enc_rre(opcode, rd.to_reg(), rn)); + } + UnaryOp::Neg32 => { + let opcode = 0x13; // LCR + put(sink, &enc_rr(opcode, rd.to_reg(), rn)); + } + UnaryOp::Neg64 => { + let opcode = 0xb903; // LCGR + put(sink, &enc_rre(opcode, rd.to_reg(), rn)); + } + UnaryOp::Neg64Ext32 => { + let opcode = 0xb913; // LCGFR + put(sink, &enc_rre(opcode, rd.to_reg(), rn)); + } + UnaryOp::PopcntByte => { + let opcode = 0xb9e1; // POPCNT + put(sink, &enc_rrf_cde(opcode, rd.to_reg(), rn, 0, 0)); + } + UnaryOp::PopcntReg => { + let opcode = 0xb9e1; // POPCNT + put(sink, &enc_rrf_cde(opcode, rd.to_reg(), rn, 8, 0)); + } + } + } + + &Inst::Extend { + rd, + rn, + signed, + from_bits, + to_bits, + } => { + let opcode = match (signed, from_bits, to_bits) { + (_, 1, 32) => 0xb926, // LBR + (_, 1, 64) => 0xb906, // LGBR + (false, 8, 32) => 0xb994, // LLCR + (false, 8, 64) => 0xb984, // LLGCR + (true, 8, 32) => 0xb926, // LBR + (true, 8, 64) => 0xb906, // LGBR + (false, 16, 32) => 0xb995, // LLHR + (false, 16, 64) => 0xb985, // LLGHR + (true, 16, 32) => 0xb927, // LHR + (true, 16, 64) => 0xb907, // LGHR + (false, 32, 64) => 0xb916, // LLGFR + (true, 32, 64) => 0xb914, // LGFR + _ => panic!( + "Unsupported extend combination: signed = {}, from_bits = {}, to_bits = {}", + signed, from_bits, to_bits + ), + }; + put(sink, &enc_rre(opcode, rd.to_reg(), rn)); + } + + &Inst::CmpRR { op, rn, rm } => { + let (opcode, is_rre) = match op { + CmpOp::CmpS32 => (0x19, false), // CR + CmpOp::CmpS64 => (0xb920, true), // CGR + CmpOp::CmpS64Ext32 => (0xb930, true), // CGFR + CmpOp::CmpL32 => (0x15, false), // CLR + CmpOp::CmpL64 => (0xb921, true), // CLGR + CmpOp::CmpL64Ext32 => (0xb931, true), // CLGFR + _ => unreachable!(), + }; + if is_rre { + put(sink, &enc_rre(opcode, rn, rm)); + } else { + put(sink, &enc_rr(opcode, rn, rm)); + } + } + &Inst::CmpRX { op, rn, ref mem } => { + let (opcode_rx, opcode_rxy, opcode_ril) = match op { + CmpOp::CmpS32 => (Some(0x59), Some(0xe359), Some(0xc6d)), // C(Y), CRL + CmpOp::CmpS32Ext16 => (Some(0x49), Some(0xe379), Some(0xc65)), // CH(Y), CHRL + CmpOp::CmpS64 => (None, Some(0xe320), Some(0xc68)), // CG, CGRL + CmpOp::CmpS64Ext16 => (None, Some(0xe334), Some(0xc64)), // CGH, CGHRL + CmpOp::CmpS64Ext32 => (None, Some(0xe330), Some(0xc6c)), // CGF, CGFRL + CmpOp::CmpL32 => (Some(0x55), Some(0xe355), Some(0xc6f)), // CL(Y), CLRL + CmpOp::CmpL32Ext16 => (None, None, Some(0xc67)), // CLHRL + CmpOp::CmpL64 => (None, Some(0xe321), Some(0xc6a)), // CLG, CLGRL + CmpOp::CmpL64Ext16 => (None, None, Some(0xc66)), // CLGHRL + CmpOp::CmpL64Ext32 => (None, Some(0xe331), Some(0xc6e)), // CLGF, CLGFRL + }; + mem_emit( + rn, mem, opcode_rx, opcode_rxy, opcode_ril, true, sink, emit_info, state, + ); + } + &Inst::CmpRSImm16 { op, rn, imm } => { + let opcode = match op { + CmpOp::CmpS32 => 0xa7e, // CHI + CmpOp::CmpS64 => 0xa7f, // CGHI + _ => unreachable!(), + }; + put(sink, &enc_ri_a(opcode, rn, imm as u16)); + } + &Inst::CmpRSImm32 { op, rn, imm } => { + let opcode = match op { + CmpOp::CmpS32 => 0xc2d, // CFI + CmpOp::CmpS64 => 0xc2c, // CGFI + _ => unreachable!(), + }; + put(sink, &enc_ril_a(opcode, rn, imm as u32)); + } + &Inst::CmpRUImm32 { op, rn, imm } => { + let opcode = match op { + CmpOp::CmpL32 => 0xc2f, // CLFI + CmpOp::CmpL64 => 0xc2e, // CLGFI + _ => unreachable!(), + }; + put(sink, &enc_ril_a(opcode, rn, imm)); + } + &Inst::CmpTrapRR { + op, + rn, + rm, + cond, + trap_code, + } => { + let opcode = match op { + CmpOp::CmpS32 => 0xb972, // CRT + CmpOp::CmpS64 => 0xb960, // CGRT + CmpOp::CmpL32 => 0xb973, // CLRT + CmpOp::CmpL64 => 0xb961, // CLGRT + _ => unreachable!(), + }; + let srcloc = state.cur_srcloc(); + put_with_trap( + sink, + &enc_rrf_cde(opcode, rn, rm, cond.bits(), 0), + srcloc, + trap_code, + ); + } + &Inst::CmpTrapRSImm16 { + op, + rn, + imm, + cond, + trap_code, + } => { + let opcode = match op { + CmpOp::CmpS32 => 0xec72, // CIT + CmpOp::CmpS64 => 0xec70, // CGIT + _ => unreachable!(), + }; + let srcloc = state.cur_srcloc(); + put_with_trap( + sink, + &enc_rie_a(opcode, rn, imm as u16, cond.bits()), + srcloc, + trap_code, + ); + } + &Inst::CmpTrapRUImm16 { + op, + rn, + imm, + cond, + trap_code, + } => { + let opcode = match op { + CmpOp::CmpL32 => 0xec73, // CLFIT + CmpOp::CmpL64 => 0xec71, // CLGIT + _ => unreachable!(), + }; + let srcloc = state.cur_srcloc(); + put_with_trap( + sink, + &enc_rie_a(opcode, rn, imm, cond.bits()), + srcloc, + trap_code, + ); + } + + &Inst::Load32 { rd, ref mem } + | &Inst::Load32ZExt8 { rd, ref mem } + | &Inst::Load32SExt8 { rd, ref mem } + | &Inst::Load32ZExt16 { rd, ref mem } + | &Inst::Load32SExt16 { rd, ref mem } + | &Inst::Load64 { rd, ref mem } + | &Inst::Load64ZExt8 { rd, ref mem } + | &Inst::Load64SExt8 { rd, ref mem } + | &Inst::Load64ZExt16 { rd, ref mem } + | &Inst::Load64SExt16 { rd, ref mem } + | &Inst::Load64ZExt32 { rd, ref mem } + | &Inst::Load64SExt32 { rd, ref mem } + | &Inst::LoadRev16 { rd, ref mem } + | &Inst::LoadRev32 { rd, ref mem } + | &Inst::LoadRev64 { rd, ref mem } + | &Inst::FpuLoad32 { rd, ref mem } + | &Inst::FpuLoad64 { rd, ref mem } => { + let (opcode_rx, opcode_rxy, opcode_ril) = match self { + &Inst::Load32 { .. } => (Some(0x58), Some(0xe358), Some(0xc4d)), // L(Y), LRL + &Inst::Load32ZExt8 { .. } => (None, Some(0xe394), None), // LLC + &Inst::Load32SExt8 { .. } => (None, Some(0xe376), None), // LB + &Inst::Load32ZExt16 { .. } => (None, Some(0xe395), Some(0xc42)), // LLH, LLHRL + &Inst::Load32SExt16 { .. } => (Some(0x48), Some(0xe378), Some(0xc45)), // LH(Y), LHRL + &Inst::Load64 { .. } => (None, Some(0xe304), Some(0xc48)), // LG, LGRL + &Inst::Load64ZExt8 { .. } => (None, Some(0xe390), None), // LLGC + &Inst::Load64SExt8 { .. } => (None, Some(0xe377), None), // LGB + &Inst::Load64ZExt16 { .. } => (None, Some(0xe391), Some(0xc46)), // LLGH, LLGHRL + &Inst::Load64SExt16 { .. } => (None, Some(0xe315), Some(0xc44)), // LGH, LGHRL + &Inst::Load64ZExt32 { .. } => (None, Some(0xe316), Some(0xc4e)), // LLGF, LLGFRL + &Inst::Load64SExt32 { .. } => (None, Some(0xe314), Some(0xc4c)), // LGF, LGFRL + &Inst::LoadRev16 { .. } => (None, Some(0xe31f), None), // LRVH + &Inst::LoadRev32 { .. } => (None, Some(0xe31e), None), // LRV + &Inst::LoadRev64 { .. } => (None, Some(0xe30f), None), // LRVG + &Inst::FpuLoad32 { .. } => (Some(0x78), Some(0xed64), None), // LE(Y) + &Inst::FpuLoad64 { .. } => (Some(0x68), Some(0xed65), None), // LD(Y) + _ => unreachable!(), + }; + let rd = rd.to_reg(); + mem_emit( + rd, mem, opcode_rx, opcode_rxy, opcode_ril, true, sink, emit_info, state, + ); + } + &Inst::FpuLoadRev32 { rd, ref mem } | &Inst::FpuLoadRev64 { rd, ref mem } => { + let opcode = match self { + &Inst::FpuLoadRev32 { .. } => 0xe603, // VLEBRF + &Inst::FpuLoadRev64 { .. } => 0xe602, // VLEBRG + _ => unreachable!(), + }; + + let (mem_insts, mem) = mem_finalize(mem, state, true, false, false, true); + for inst in mem_insts.into_iter() { + inst.emit(sink, emit_info, state); + } + + let srcloc = state.cur_srcloc(); + if srcloc != SourceLoc::default() && mem.can_trap() { + sink.add_trap(srcloc, TrapCode::HeapOutOfBounds); + } + + match &mem { + &MemArg::BXD12 { + base, index, disp, .. + } => { + put( + sink, + &enc_vrx(opcode, rd.to_reg(), base, index, disp.bits(), 0), + ); + } + _ => unreachable!(), + } + } + + &Inst::Store8 { rd, ref mem } + | &Inst::Store16 { rd, ref mem } + | &Inst::Store32 { rd, ref mem } + | &Inst::Store64 { rd, ref mem } + | &Inst::StoreRev16 { rd, ref mem } + | &Inst::StoreRev32 { rd, ref mem } + | &Inst::StoreRev64 { rd, ref mem } + | &Inst::FpuStore32 { rd, ref mem } + | &Inst::FpuStore64 { rd, ref mem } => { + let (opcode_rx, opcode_rxy, opcode_ril) = match self { + &Inst::Store8 { .. } => (Some(0x42), Some(0xe372), None), // STC(Y) + &Inst::Store16 { .. } => (Some(0x40), Some(0xe370), Some(0xc47)), // STH(Y), STHRL + &Inst::Store32 { .. } => (Some(0x50), Some(0xe350), Some(0xc4f)), // ST(Y), STRL + &Inst::Store64 { .. } => (None, Some(0xe324), Some(0xc4b)), // STG, STGRL + &Inst::StoreRev16 { .. } => (None, Some(0xe33f), None), // STRVH + &Inst::StoreRev32 { .. } => (None, Some(0xe33e), None), // STRV + &Inst::StoreRev64 { .. } => (None, Some(0xe32f), None), // STRVG + &Inst::FpuStore32 { .. } => (Some(0x70), Some(0xed66), None), // STE(Y) + &Inst::FpuStore64 { .. } => (Some(0x60), Some(0xed67), None), // STD(Y) + _ => unreachable!(), + }; + mem_emit( + rd, mem, opcode_rx, opcode_rxy, opcode_ril, true, sink, emit_info, state, + ); + } + &Inst::StoreImm8 { imm, ref mem } => { + let opcode_si = 0x92; // MVI + let opcode_siy = 0xeb52; // MVIY + mem_imm8_emit( + imm, mem, opcode_si, opcode_siy, true, sink, emit_info, state, + ); + } + &Inst::StoreImm16 { imm, ref mem } + | &Inst::StoreImm32SExt16 { imm, ref mem } + | &Inst::StoreImm64SExt16 { imm, ref mem } => { + let opcode = match self { + &Inst::StoreImm16 { .. } => 0xe544, // MVHHI + &Inst::StoreImm32SExt16 { .. } => 0xe54c, // MVHI + &Inst::StoreImm64SExt16 { .. } => 0xe548, // MVGHI + _ => unreachable!(), + }; + mem_imm16_emit(imm, mem, opcode, true, sink, emit_info, state); + } + &Inst::FpuStoreRev32 { rd, ref mem } | &Inst::FpuStoreRev64 { rd, ref mem } => { + let opcode = match self { + &Inst::FpuStoreRev32 { .. } => 0xe60b, // VSTEBRF + &Inst::FpuStoreRev64 { .. } => 0xe60a, // VSTEBRG + _ => unreachable!(), + }; + + let (mem_insts, mem) = mem_finalize(mem, state, true, false, false, true); + for inst in mem_insts.into_iter() { + inst.emit(sink, emit_info, state); + } + + let srcloc = state.cur_srcloc(); + if srcloc != SourceLoc::default() && mem.can_trap() { + sink.add_trap(srcloc, TrapCode::HeapOutOfBounds); + } + + match &mem { + &MemArg::BXD12 { + base, index, disp, .. + } => { + put(sink, &enc_vrx(opcode, rd, base, index, disp.bits(), 0)); + } + _ => unreachable!(), + } + } + + &Inst::LoadMultiple64 { + rt, + rt2, + addr_reg, + addr_off, + } => { + let opcode = 0xeb04; // LMG + let rt = rt.to_reg(); + let rt2 = rt2.to_reg(); + put(sink, &enc_rsy(opcode, rt, rt2, addr_reg, addr_off.bits())); + } + &Inst::StoreMultiple64 { + rt, + rt2, + addr_reg, + addr_off, + } => { + let opcode = 0xeb24; // STMG + put(sink, &enc_rsy(opcode, rt, rt2, addr_reg, addr_off.bits())); + } + + &Inst::LoadAddr { rd, ref mem } => { + let opcode_rx = Some(0x41); // LA + let opcode_rxy = Some(0xe371); // LAY + let opcode_ril = Some(0xc00); // LARL + let rd = rd.to_reg(); + mem_emit( + rd, mem, opcode_rx, opcode_rxy, opcode_ril, false, sink, emit_info, state, + ); + } + + &Inst::Mov64 { rd, rm } => { + let opcode = 0xb904; // LGR + put(sink, &enc_rre(opcode, rd.to_reg(), rm)); + } + &Inst::Mov32 { rd, rm } => { + let opcode = 0x18; // LR + put(sink, &enc_rr(opcode, rd.to_reg(), rm)); + } + &Inst::Mov32Imm { rd, imm } => { + let opcode = 0xc09; // IILF + put(sink, &enc_ril_a(opcode, rd.to_reg(), imm)); + } + &Inst::Mov32SImm16 { rd, imm } => { + let opcode = 0xa78; // LHI + put(sink, &enc_ri_a(opcode, rd.to_reg(), imm as u16)); + } + &Inst::Mov64SImm16 { rd, imm } => { + let opcode = 0xa79; // LGHI + put(sink, &enc_ri_a(opcode, rd.to_reg(), imm as u16)); + } + &Inst::Mov64SImm32 { rd, imm } => { + let opcode = 0xc01; // LGFI + put(sink, &enc_ril_a(opcode, rd.to_reg(), imm as u32)); + } + &Inst::CMov32 { rd, cond, rm } => { + let opcode = 0xb9f2; // LOCR + put(sink, &enc_rrf_cde(opcode, rd.to_reg(), rm, cond.bits(), 0)); + } + &Inst::CMov64 { rd, cond, rm } => { + let opcode = 0xb9e2; // LOCGR + put(sink, &enc_rrf_cde(opcode, rd.to_reg(), rm, cond.bits(), 0)); + } + &Inst::CMov32SImm16 { rd, cond, imm } => { + let opcode = 0xec42; // LOCHI + put( + sink, + &enc_rie_g(opcode, rd.to_reg(), imm as u16, cond.bits()), + ); + } + &Inst::CMov64SImm16 { rd, cond, imm } => { + let opcode = 0xec46; // LOCGHI + put( + sink, + &enc_rie_g(opcode, rd.to_reg(), imm as u16, cond.bits()), + ); + } + &Inst::Mov64UImm16Shifted { rd, imm } => { + let opcode = match imm.shift { + 0 => 0xa5f, // LLILL + 1 => 0xa5e, // LLILH + 2 => 0xa5d, // LLIHL + 3 => 0xa5c, // LLIHH + _ => unreachable!(), + }; + put(sink, &enc_ri_a(opcode, rd.to_reg(), imm.bits)); + } + &Inst::Mov64UImm32Shifted { rd, imm } => { + let opcode = match imm.shift { + 0 => 0xc0f, // LLILF + 1 => 0xc0e, // LLIHF + _ => unreachable!(), + }; + put(sink, &enc_ril_a(opcode, rd.to_reg(), imm.bits)); + } + &Inst::Insert64UImm16Shifted { rd, imm } => { + let opcode = match imm.shift { + 0 => 0xa53, // IILL + 1 => 0xa52, // IILH + 2 => 0xa51, // IIHL + 3 => 0xa50, // IIHH + _ => unreachable!(), + }; + put(sink, &enc_ri_a(opcode, rd.to_reg(), imm.bits)); + } + &Inst::Insert64UImm32Shifted { rd, imm } => { + let opcode = match imm.shift { + 0 => 0xc09, // IILF + 1 => 0xc08, // IIHF + _ => unreachable!(), + }; + put(sink, &enc_ril_a(opcode, rd.to_reg(), imm.bits)); + } + &Inst::LoadExtNameFar { + rd, + ref name, + offset, + } => { + let opcode = 0xa75; // BRAS + let srcloc = state.cur_srcloc(); + let reg = writable_spilltmp_reg().to_reg(); + put(sink, &enc_ri_b(opcode, reg, 12)); + sink.add_reloc(srcloc, Reloc::Abs8, name, offset); + if emit_info.flags().emit_all_ones_funcaddrs() { + sink.put8(u64::max_value()); + } else { + sink.put8(0); + } + let inst = Inst::Load64 { + rd, + mem: MemArg::reg(reg, MemFlags::trusted()), + }; + inst.emit(sink, emit_info, state); + } + + &Inst::FpuMove32 { rd, rn } => { + let opcode = 0x38; // LER + put(sink, &enc_rr(opcode, rd.to_reg(), rn)); + } + &Inst::FpuMove64 { rd, rn } => { + let opcode = 0x28; // LDR + put(sink, &enc_rr(opcode, rd.to_reg(), rn)); + } + &Inst::FpuCMov32 { rd, cond, rm } => { + let opcode = 0xa74; // BCR + put(sink, &enc_ri_c(opcode, cond.invert().bits(), 4 + 2)); + let opcode = 0x38; // LER + put(sink, &enc_rr(opcode, rd.to_reg(), rm)); + } + &Inst::FpuCMov64 { rd, cond, rm } => { + let opcode = 0xa74; // BCR + put(sink, &enc_ri_c(opcode, cond.invert().bits(), 4 + 2)); + let opcode = 0x28; // LDR + put(sink, &enc_rr(opcode, rd.to_reg(), rm)); + } + &Inst::MovToFpr { rd, rn } => { + let opcode = 0xb3c1; // LDGR + put(sink, &enc_rre(opcode, rd.to_reg(), rn)); + } + &Inst::MovFromFpr { rd, rn } => { + let opcode = 0xb3cd; // LGDR + put(sink, &enc_rre(opcode, rd.to_reg(), rn)); + } + &Inst::LoadFpuConst32 { rd, const_data } => { + let opcode = 0xa75; // BRAS + let reg = writable_spilltmp_reg().to_reg(); + put(sink, &enc_ri_b(opcode, reg, 8)); + sink.put4(const_data.to_bits().swap_bytes()); + let inst = Inst::FpuLoad32 { + rd, + mem: MemArg::reg(reg, MemFlags::trusted()), + }; + inst.emit(sink, emit_info, state); + } + &Inst::LoadFpuConst64 { rd, const_data } => { + let opcode = 0xa75; // BRAS + let reg = writable_spilltmp_reg().to_reg(); + put(sink, &enc_ri_b(opcode, reg, 12)); + sink.put8(const_data.to_bits().swap_bytes()); + let inst = Inst::FpuLoad64 { + rd, + mem: MemArg::reg(reg, MemFlags::trusted()), + }; + inst.emit(sink, emit_info, state); + } + + &Inst::FpuCopysign { rd, rn, rm } => { + let opcode = 0xb372; // CPSDR + put(sink, &enc_rrf_ab(opcode, rd.to_reg(), rn, rm, 0)); + } + &Inst::FpuRR { fpu_op, rd, rn } => { + let opcode = match fpu_op { + FPUOp1::Abs32 => 0xb300, // LPEBR + FPUOp1::Abs64 => 0xb310, // LPDBR + FPUOp1::Neg32 => 0xb303, // LCEBR + FPUOp1::Neg64 => 0xb313, // LCDBR + FPUOp1::NegAbs32 => 0xb301, // LNEBR + FPUOp1::NegAbs64 => 0xb311, // LNDBR + FPUOp1::Sqrt32 => 0xb314, // SQEBR + FPUOp1::Sqrt64 => 0xb315, // SQDBR + FPUOp1::Cvt32To64 => 0xb304, // LDEBR + FPUOp1::Cvt64To32 => 0xb344, // LEDBR + }; + put(sink, &enc_rre(opcode, rd.to_reg(), rn)); + } + &Inst::FpuRRR { fpu_op, rd, rm } => { + let opcode = match fpu_op { + FPUOp2::Add32 => 0xb30a, // AEBR + FPUOp2::Add64 => 0xb31a, // ADBR + FPUOp2::Sub32 => 0xb30b, // SEBR + FPUOp2::Sub64 => 0xb31b, // SDBR + FPUOp2::Mul32 => 0xb317, // MEEBR + FPUOp2::Mul64 => 0xb31c, // MDBR + FPUOp2::Div32 => 0xb30d, // DEBR + FPUOp2::Div64 => 0xb31d, // DDBR + _ => unimplemented!(), + }; + put(sink, &enc_rre(opcode, rd.to_reg(), rm)); + } + &Inst::FpuRRRR { fpu_op, rd, rn, rm } => { + let opcode = match fpu_op { + FPUOp3::MAdd32 => 0xb30e, // MAEBR + FPUOp3::MAdd64 => 0xb31e, // MADBR + FPUOp3::MSub32 => 0xb30f, // MSEBR + FPUOp3::MSub64 => 0xb31f, // MSDBR + }; + put(sink, &enc_rrd(opcode, rd.to_reg(), rm, rn)); + } + &Inst::FpuToInt { op, rd, rn } => { + let opcode = match op { + FpuToIntOp::F32ToI32 => 0xb398, // CFEBRA + FpuToIntOp::F32ToU32 => 0xb39c, // CLFEBR + FpuToIntOp::F32ToI64 => 0xb3a8, // CGEBRA + FpuToIntOp::F32ToU64 => 0xb3ac, // CLGEBR + FpuToIntOp::F64ToI32 => 0xb399, // CFDBRA + FpuToIntOp::F64ToU32 => 0xb39d, // CLFDBR + FpuToIntOp::F64ToI64 => 0xb3a9, // CGDBRA + FpuToIntOp::F64ToU64 => 0xb3ad, // CLGDBR + }; + put(sink, &enc_rrf_cde(opcode, rd.to_reg(), rn, 5, 0)); + } + &Inst::IntToFpu { op, rd, rn } => { + let opcode = match op { + IntToFpuOp::I32ToF32 => 0xb394, // CEFBRA + IntToFpuOp::U32ToF32 => 0xb390, // CELFBR + IntToFpuOp::I64ToF32 => 0xb3a4, // CEGBRA + IntToFpuOp::U64ToF32 => 0xb3a0, // CELGBR + IntToFpuOp::I32ToF64 => 0xb395, // CDFBRA + IntToFpuOp::U32ToF64 => 0xb391, // CDLFBR + IntToFpuOp::I64ToF64 => 0xb3a5, // CDGBRA + IntToFpuOp::U64ToF64 => 0xb3a1, // CDLGBR + }; + put(sink, &enc_rrf_cde(opcode, rd.to_reg(), rn, 0, 0)); + } + &Inst::FpuRound { op, rd, rn } => { + let (opcode, m3) = match op { + FpuRoundMode::Minus32 => (0xb357, 7), // FIEBR + FpuRoundMode::Minus64 => (0xb35f, 7), // FIDBR + FpuRoundMode::Plus32 => (0xb357, 6), // FIEBR + FpuRoundMode::Plus64 => (0xb35f, 6), // FIDBR + FpuRoundMode::Zero32 => (0xb357, 5), // FIEBR + FpuRoundMode::Zero64 => (0xb35f, 5), // FIDBR + FpuRoundMode::Nearest32 => (0xb357, 4), // FIEBR + FpuRoundMode::Nearest64 => (0xb35f, 4), // FIDBR + }; + put(sink, &enc_rrf_cde(opcode, rd.to_reg(), rn, m3, 0)); + } + &Inst::FpuVecRRR { fpu_op, rd, rn, rm } => { + let (opcode, m4) = match fpu_op { + FPUOp2::Max32 => (0xe7ef, 2), // VFMAX + FPUOp2::Max64 => (0xe7ef, 3), // VFMAX + FPUOp2::Min32 => (0xe7ee, 2), // VFMIN + FPUOp2::Min64 => (0xe7ee, 3), // VFMIN + _ => unimplemented!(), + }; + put(sink, &enc_vrr(opcode, rd.to_reg(), rn, rm, m4, 8, 1)); + } + &Inst::FpuCmp32 { rn, rm } => { + let opcode = 0xb309; // CEBR + put(sink, &enc_rre(opcode, rn, rm)); + } + &Inst::FpuCmp64 { rn, rm } => { + let opcode = 0xb319; // CDBR + put(sink, &enc_rre(opcode, rn, rm)); + } + + &Inst::Call { link, ref info } => { + let opcode = 0xc05; // BRASL + let reloc = Reloc::S390xPCRel32Dbl; + let srcloc = state.cur_srcloc(); + if let Some(s) = state.take_stack_map() { + sink.add_stack_map(StackMapExtent::UpcomingBytes(6), s); + } + put_with_reloc( + sink, + &enc_ril_b(opcode, link.to_reg(), 0), + 2, + srcloc, + reloc, + &info.dest, + 0, + ); + if info.opcode.is_call() { + sink.add_call_site(srcloc, info.opcode); + } + } + &Inst::CallInd { link, ref info } => { + let opcode = 0x0d; // BASR + let srcloc = state.cur_srcloc(); + if let Some(s) = state.take_stack_map() { + sink.add_stack_map(StackMapExtent::UpcomingBytes(2), s); + } + put(sink, &enc_rr(opcode, link.to_reg(), info.rn)); + if info.opcode.is_call() { + sink.add_call_site(srcloc, info.opcode); + } + } + &Inst::Ret { link } => { + let opcode = 0x07; // BCR + put(sink, &enc_rr(opcode, gpr(15), link)); + } + &Inst::EpiloguePlaceholder => { + // Noop; this is just a placeholder for epilogues. + } + &Inst::Jump { ref dest } => { + let off = sink.cur_offset(); + // Indicate that the jump uses a label, if so, so that a fixup can occur later. + if let Some(l) = dest.as_label() { + sink.use_label_at_offset(off, l, LabelUse::BranchRIL); + sink.add_uncond_branch(off, off + 6, l); + } + // Emit the jump itself. + let opcode = 0xc04; // BCRL + put(sink, &enc_ril_c(opcode, 15, dest.as_ril_offset_or_zero())); + } + &Inst::IndirectBr { rn, .. } => { + let opcode = 0x07; // BCR + put(sink, &enc_rr(opcode, gpr(15), rn)); + } + &Inst::CondBr { + ref taken, + ref not_taken, + cond, + } => { + let opcode = 0xc04; // BCRL + + // Conditional part first. + let cond_off = sink.cur_offset(); + if let Some(l) = taken.as_label() { + sink.use_label_at_offset(cond_off, l, LabelUse::BranchRIL); + let inverted = &enc_ril_c(opcode, cond.invert().bits(), 0); + sink.add_cond_branch(cond_off, cond_off + 6, l, inverted); + } + put( + sink, + &enc_ril_c(opcode, cond.bits(), taken.as_ril_offset_or_zero()), + ); + + // Unconditional part next. + let uncond_off = sink.cur_offset(); + if let Some(l) = not_taken.as_label() { + sink.use_label_at_offset(uncond_off, l, LabelUse::BranchRIL); + sink.add_uncond_branch(uncond_off, uncond_off + 6, l); + } + put( + sink, + &enc_ril_c(opcode, 15, not_taken.as_ril_offset_or_zero()), + ); + } + &Inst::OneWayCondBr { ref target, cond } => { + let opcode = 0xc04; // BCRL + if let Some(l) = target.as_label() { + sink.use_label_at_offset(sink.cur_offset(), l, LabelUse::BranchRIL); + } + put( + sink, + &enc_ril_c(opcode, cond.bits(), target.as_ril_offset_or_zero()), + ); + } + &Inst::Nop0 => {} + &Inst::Nop2 => { + put(sink, &enc_e(0x0707)); + } + &Inst::Debugtrap => { + put(sink, &enc_e(0x0001)); + } + &Inst::Trap { trap_code } => { + if let Some(s) = state.take_stack_map() { + sink.add_stack_map(StackMapExtent::UpcomingBytes(2), s); + } + let srcloc = state.cur_srcloc(); + put_with_trap(sink, &enc_e(0x0000), srcloc, trap_code); + } + &Inst::TrapIf { cond, trap_code } => { + // Branch over trap if condition is false. + let opcode = 0xa74; // BCR + put(sink, &enc_ri_c(opcode, cond.invert().bits(), 4 + 2)); + // Now emit the actual trap. + if let Some(s) = state.take_stack_map() { + sink.add_stack_map(StackMapExtent::UpcomingBytes(2), s); + } + let srcloc = state.cur_srcloc(); + put_with_trap(sink, &enc_e(0x0000), srcloc, trap_code); + } + &Inst::JTSequence { + ridx, + rtmp1, + rtmp2, + ref info, + .. + } => { + let table_label = sink.get_label(); + + // This sequence is *one* instruction in the vcode, and is expanded only here at + // emission time, because we cannot allow the regalloc to insert spills/reloads in + // the middle; we depend on hardcoded PC-rel addressing below. + + // Bounds-check index and branch to default. + let inst = Inst::CmpRUImm32 { + op: CmpOp::CmpL64, + rn: ridx, + imm: info.targets.len() as u32, + }; + inst.emit(sink, emit_info, state); + let inst = Inst::OneWayCondBr { + target: info.default_target, + cond: Cond::from_intcc(IntCC::UnsignedGreaterThanOrEqual), + }; + inst.emit(sink, emit_info, state); + + // Set rtmp2 to index scaled by entry size. + let inst = Inst::ShiftRR { + shift_op: ShiftOp::LShL64, + rd: rtmp2, + rn: ridx, + shift_imm: SImm20::maybe_from_i64(2).unwrap(), + shift_reg: None, + }; + inst.emit(sink, emit_info, state); + + // Set rtmp1 to address of jump table. + let inst = Inst::LoadAddr { + rd: rtmp1, + mem: MemArg::Label { + target: BranchTarget::Label(table_label), + }, + }; + inst.emit(sink, emit_info, state); + + // Set rtmp2 to value loaded out of jump table. + let inst = Inst::Load64SExt32 { + rd: rtmp2, + mem: MemArg::reg_plus_reg(rtmp1.to_reg(), rtmp2.to_reg(), MemFlags::trusted()), + }; + inst.emit(sink, emit_info, state); + + // Set rtmp1 to target address (rtmp1 + rtmp2). + let inst = Inst::AluRRR { + alu_op: ALUOp::Add64, + rd: rtmp1, + rn: rtmp1.to_reg(), + rm: rtmp2.to_reg(), + }; + inst.emit(sink, emit_info, state); + + // Branch to computed address. (`targets` here is only used for successor queries + // and is not needed for emission.) + let inst = Inst::IndirectBr { + rn: rtmp1.to_reg(), + targets: vec![], + }; + inst.emit(sink, emit_info, state); + + // Emit jump table (table of 32-bit offsets). + sink.bind_label(table_label); + let jt_off = sink.cur_offset(); + for &target in info.targets.iter() { + let word_off = sink.cur_offset(); + let off_into_table = word_off - jt_off; + sink.use_label_at_offset( + word_off, + target.as_label().unwrap(), + LabelUse::PCRel32, + ); + sink.put4(off_into_table.swap_bytes()); + } + + // Lowering produces an EmitIsland before using a JTSequence, so we can safely + // disable the worst-case-size check in this case. + start_off = sink.cur_offset(); + } + + &Inst::VirtualSPOffsetAdj { offset } => { + debug!( + "virtual sp offset adjusted by {} -> {}", + offset, + state.virtual_sp_offset + offset + ); + state.virtual_sp_offset += offset; + } + + &Inst::ValueLabelMarker { .. } => { + // Nothing; this is only used to compute debug info. + } + + &Inst::Unwind { ref inst } => { + sink.add_unwind(inst.clone()); + } + } + + let end_off = sink.cur_offset(); + debug_assert!((end_off - start_off) <= Inst::worst_case_size()); + + state.clear_post_insn(); + } + + fn pretty_print(&self, mb_rru: Option<&RealRegUniverse>, state: &mut EmitState) -> String { + self.print_with_state(mb_rru, state) + } +} diff --git a/cranelift/codegen/src/isa/s390x/inst/emit_tests.rs b/cranelift/codegen/src/isa/s390x/inst/emit_tests.rs new file mode 100644 index 0000000000..746daf97f1 --- /dev/null +++ b/cranelift/codegen/src/isa/s390x/inst/emit_tests.rs @@ -0,0 +1,7140 @@ +use crate::ir::MemFlags; +use crate::isa::s390x::inst::*; +use crate::isa::test_utils; +use crate::settings; +use alloc::vec::Vec; + +#[test] +fn test_s390x_binemit() { + let mut insns = Vec::<(Inst, &str, &str)>::new(); + + insns.push((Inst::Nop0, "", "nop-zero-len")); + insns.push((Inst::Nop2, "0707", "nop")); + + insns.push(( + Inst::AluRRR { + alu_op: ALUOp::Add32, + rd: writable_gpr(1), + rn: gpr(2), + rm: gpr(3), + }, + "B9F83012", + "ark %r1, %r2, %r3", + )); + insns.push(( + Inst::AluRRR { + alu_op: ALUOp::Add64, + rd: writable_gpr(4), + rn: gpr(5), + rm: gpr(6), + }, + "B9E86045", + "agrk %r4, %r5, %r6", + )); + insns.push(( + Inst::AluRRR { + alu_op: ALUOp::Sub32, + rd: writable_gpr(1), + rn: gpr(2), + rm: gpr(3), + }, + "B9F93012", + "srk %r1, %r2, %r3", + )); + insns.push(( + Inst::AluRRR { + alu_op: ALUOp::Sub64, + rd: writable_gpr(4), + rn: gpr(5), + rm: gpr(6), + }, + "B9E96045", + "sgrk %r4, %r5, %r6", + )); + insns.push(( + Inst::AluRRR { + alu_op: ALUOp::Mul32, + rd: writable_gpr(1), + rn: gpr(2), + rm: gpr(3), + }, + "B9FD3012", + "msrkc %r1, %r2, %r3", + )); + insns.push(( + Inst::AluRRR { + alu_op: ALUOp::Mul64, + rd: writable_gpr(4), + rn: gpr(5), + rm: gpr(6), + }, + "B9ED6045", + "msgrkc %r4, %r5, %r6", + )); + insns.push(( + Inst::AluRRR { + alu_op: ALUOp::And32, + rd: writable_gpr(1), + rn: gpr(2), + rm: gpr(3), + }, + "B9F43012", + "nrk %r1, %r2, %r3", + )); + insns.push(( + Inst::AluRRR { + alu_op: ALUOp::And64, + rd: writable_gpr(4), + rn: gpr(5), + rm: gpr(6), + }, + "B9E46045", + "ngrk %r4, %r5, %r6", + )); + insns.push(( + Inst::AluRRR { + alu_op: ALUOp::Orr32, + rd: writable_gpr(1), + rn: gpr(2), + rm: gpr(3), + }, + "B9F63012", + "ork %r1, %r2, %r3", + )); + insns.push(( + Inst::AluRRR { + alu_op: ALUOp::Orr64, + rd: writable_gpr(4), + rn: gpr(5), + rm: gpr(6), + }, + "B9E66045", + "ogrk %r4, %r5, %r6", + )); + insns.push(( + Inst::AluRRR { + alu_op: ALUOp::Xor32, + rd: writable_gpr(1), + rn: gpr(2), + rm: gpr(3), + }, + "B9F73012", + "xrk %r1, %r2, %r3", + )); + insns.push(( + Inst::AluRRR { + alu_op: ALUOp::Xor64, + rd: writable_gpr(4), + rn: gpr(5), + rm: gpr(6), + }, + "B9E76045", + "xgrk %r4, %r5, %r6", + )); + insns.push(( + Inst::AluRRR { + alu_op: ALUOp::AndNot32, + rd: writable_gpr(1), + rn: gpr(2), + rm: gpr(3), + }, + "B9743012", + "nnrk %r1, %r2, %r3", + )); + insns.push(( + Inst::AluRRR { + alu_op: ALUOp::AndNot64, + rd: writable_gpr(4), + rn: gpr(5), + rm: gpr(6), + }, + "B9646045", + "nngrk %r4, %r5, %r6", + )); + insns.push(( + Inst::AluRRR { + alu_op: ALUOp::OrrNot32, + rd: writable_gpr(1), + rn: gpr(2), + rm: gpr(3), + }, + "B9763012", + "nork %r1, %r2, %r3", + )); + insns.push(( + Inst::AluRRR { + alu_op: ALUOp::OrrNot64, + rd: writable_gpr(4), + rn: gpr(5), + rm: gpr(6), + }, + "B9666045", + "nogrk %r4, %r5, %r6", + )); + insns.push(( + Inst::AluRRR { + alu_op: ALUOp::XorNot32, + rd: writable_gpr(1), + rn: gpr(2), + rm: gpr(3), + }, + "B9773012", + "nxrk %r1, %r2, %r3", + )); + insns.push(( + Inst::AluRRR { + alu_op: ALUOp::XorNot64, + rd: writable_gpr(4), + rn: gpr(5), + rm: gpr(6), + }, + "B9676045", + "nxgrk %r4, %r5, %r6", + )); + + insns.push(( + Inst::AluRRSImm16 { + alu_op: ALUOp::Add32, + rd: writable_gpr(4), + rn: gpr(5), + imm: -32768, + }, + "EC45800000D8", + "ahik %r4, %r5, -32768", + )); + insns.push(( + Inst::AluRRSImm16 { + alu_op: ALUOp::Add32, + rd: writable_gpr(4), + rn: gpr(5), + imm: 32767, + }, + "EC457FFF00D8", + "ahik %r4, %r5, 32767", + )); + insns.push(( + Inst::AluRRSImm16 { + alu_op: ALUOp::Add64, + rd: writable_gpr(4), + rn: gpr(5), + imm: -32768, + }, + "EC45800000D9", + "aghik %r4, %r5, -32768", + )); + insns.push(( + Inst::AluRRSImm16 { + alu_op: ALUOp::Add64, + rd: writable_gpr(4), + rn: gpr(5), + imm: 32767, + }, + "EC457FFF00D9", + "aghik %r4, %r5, 32767", + )); + + insns.push(( + Inst::AluRR { + alu_op: ALUOp::Add32, + rd: writable_gpr(1), + rm: gpr(2), + }, + "1A12", + "ar %r1, %r2", + )); + insns.push(( + Inst::AluRR { + alu_op: ALUOp::Add64, + rd: writable_gpr(4), + rm: gpr(5), + }, + "B9080045", + "agr %r4, %r5", + )); + insns.push(( + Inst::AluRR { + alu_op: ALUOp::Add64Ext32, + rd: writable_gpr(4), + rm: gpr(5), + }, + "B9180045", + "agfr %r4, %r5", + )); + insns.push(( + Inst::AluRR { + alu_op: ALUOp::Sub32, + rd: writable_gpr(1), + rm: gpr(2), + }, + "1B12", + "sr %r1, %r2", + )); + insns.push(( + Inst::AluRR { + alu_op: ALUOp::Sub64, + rd: writable_gpr(4), + rm: gpr(5), + }, + "B9090045", + "sgr %r4, %r5", + )); + insns.push(( + Inst::AluRR { + alu_op: ALUOp::Sub64Ext32, + rd: writable_gpr(4), + rm: gpr(5), + }, + "B9190045", + "sgfr %r4, %r5", + )); + insns.push(( + Inst::AluRR { + alu_op: ALUOp::Mul32, + rd: writable_gpr(1), + rm: gpr(2), + }, + "B2520012", + "msr %r1, %r2", + )); + insns.push(( + Inst::AluRR { + alu_op: ALUOp::Mul64, + rd: writable_gpr(4), + rm: gpr(5), + }, + "B90C0045", + "msgr %r4, %r5", + )); + insns.push(( + Inst::AluRR { + alu_op: ALUOp::Mul64Ext32, + rd: writable_gpr(4), + rm: gpr(5), + }, + "B91C0045", + "msgfr %r4, %r5", + )); + insns.push(( + Inst::AluRR { + alu_op: ALUOp::And32, + rd: writable_gpr(1), + rm: gpr(2), + }, + "1412", + "nr %r1, %r2", + )); + insns.push(( + Inst::AluRR { + alu_op: ALUOp::And64, + rd: writable_gpr(4), + rm: gpr(5), + }, + "B9800045", + "ngr %r4, %r5", + )); + insns.push(( + Inst::AluRR { + alu_op: ALUOp::Orr32, + rd: writable_gpr(1), + rm: gpr(2), + }, + "1612", + "or %r1, %r2", + )); + insns.push(( + Inst::AluRR { + alu_op: ALUOp::Orr64, + rd: writable_gpr(4), + rm: gpr(5), + }, + "B9810045", + "ogr %r4, %r5", + )); + insns.push(( + Inst::AluRR { + alu_op: ALUOp::Xor32, + rd: writable_gpr(1), + rm: gpr(2), + }, + "1712", + "xr %r1, %r2", + )); + insns.push(( + Inst::AluRR { + alu_op: ALUOp::Xor64, + rd: writable_gpr(4), + rm: gpr(5), + }, + "B9820045", + "xgr %r4, %r5", + )); + + insns.push(( + Inst::AluRX { + alu_op: ALUOp::Add32, + rd: writable_gpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "5A102000", + "a %r1, 0(%r2)", + )); + insns.push(( + Inst::AluRX { + alu_op: ALUOp::Add32Ext16, + rd: writable_gpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "4A102000", + "ah %r1, 0(%r2)", + )); + insns.push(( + Inst::AluRX { + alu_op: ALUOp::Add32, + rd: writable_gpr(1), + mem: MemArg::BXD20 { + base: gpr(2), + index: zero_reg(), + disp: SImm20::zero(), + flags: MemFlags::trusted(), + }, + }, + "E3102000005A", + "ay %r1, 0(%r2)", + )); + insns.push(( + Inst::AluRX { + alu_op: ALUOp::Add32Ext16, + rd: writable_gpr(1), + mem: MemArg::BXD20 { + base: gpr(2), + index: zero_reg(), + disp: SImm20::zero(), + flags: MemFlags::trusted(), + }, + }, + "E3102000004A", + "ahy %r1, 0(%r2)", + )); + insns.push(( + Inst::AluRX { + alu_op: ALUOp::Add64, + rd: writable_gpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "E31020000008", + "ag %r1, 0(%r2)", + )); + insns.push(( + Inst::AluRX { + alu_op: ALUOp::Add64Ext16, + rd: writable_gpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "E31020000038", + "agh %r1, 0(%r2)", + )); + insns.push(( + Inst::AluRX { + alu_op: ALUOp::Add64Ext32, + rd: writable_gpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "E31020000018", + "agf %r1, 0(%r2)", + )); + insns.push(( + Inst::AluRX { + alu_op: ALUOp::Sub32, + rd: writable_gpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "5B102000", + "s %r1, 0(%r2)", + )); + insns.push(( + Inst::AluRX { + alu_op: ALUOp::Sub32Ext16, + rd: writable_gpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "4B102000", + "sh %r1, 0(%r2)", + )); + insns.push(( + Inst::AluRX { + alu_op: ALUOp::Sub32, + rd: writable_gpr(1), + mem: MemArg::BXD20 { + base: gpr(2), + index: zero_reg(), + disp: SImm20::zero(), + flags: MemFlags::trusted(), + }, + }, + "E3102000005B", + "sy %r1, 0(%r2)", + )); + insns.push(( + Inst::AluRX { + alu_op: ALUOp::Sub32Ext16, + rd: writable_gpr(1), + mem: MemArg::BXD20 { + base: gpr(2), + index: zero_reg(), + disp: SImm20::zero(), + flags: MemFlags::trusted(), + }, + }, + "E3102000007B", + "shy %r1, 0(%r2)", + )); + insns.push(( + Inst::AluRX { + alu_op: ALUOp::Sub64, + rd: writable_gpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "E31020000009", + "sg %r1, 0(%r2)", + )); + insns.push(( + Inst::AluRX { + alu_op: ALUOp::Sub64Ext16, + rd: writable_gpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "E31020000039", + "sgh %r1, 0(%r2)", + )); + insns.push(( + Inst::AluRX { + alu_op: ALUOp::Sub64Ext32, + rd: writable_gpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "E31020000019", + "sgf %r1, 0(%r2)", + )); + insns.push(( + Inst::AluRX { + alu_op: ALUOp::Mul32, + rd: writable_gpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "71102000", + "ms %r1, 0(%r2)", + )); + insns.push(( + Inst::AluRX { + alu_op: ALUOp::Mul32Ext16, + rd: writable_gpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "4C102000", + "mh %r1, 0(%r2)", + )); + insns.push(( + Inst::AluRX { + alu_op: ALUOp::Mul32, + rd: writable_gpr(1), + mem: MemArg::BXD20 { + base: gpr(2), + index: zero_reg(), + disp: SImm20::zero(), + flags: MemFlags::trusted(), + }, + }, + "E31020000051", + "msy %r1, 0(%r2)", + )); + insns.push(( + Inst::AluRX { + alu_op: ALUOp::Mul32Ext16, + rd: writable_gpr(1), + mem: MemArg::BXD20 { + base: gpr(2), + index: zero_reg(), + disp: SImm20::zero(), + flags: MemFlags::trusted(), + }, + }, + "E3102000007C", + "mhy %r1, 0(%r2)", + )); + insns.push(( + Inst::AluRX { + alu_op: ALUOp::Mul64, + rd: writable_gpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "E3102000000C", + "msg %r1, 0(%r2)", + )); + insns.push(( + Inst::AluRX { + alu_op: ALUOp::Mul64Ext16, + rd: writable_gpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "E3102000003C", + "mgh %r1, 0(%r2)", + )); + insns.push(( + Inst::AluRX { + alu_op: ALUOp::Mul64Ext32, + rd: writable_gpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "E3102000001C", + "msgf %r1, 0(%r2)", + )); + insns.push(( + Inst::AluRX { + alu_op: ALUOp::And32, + rd: writable_gpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "54102000", + "n %r1, 0(%r2)", + )); + insns.push(( + Inst::AluRX { + alu_op: ALUOp::And32, + rd: writable_gpr(1), + mem: MemArg::BXD20 { + base: gpr(2), + index: zero_reg(), + disp: SImm20::zero(), + flags: MemFlags::trusted(), + }, + }, + "E31020000054", + "ny %r1, 0(%r2)", + )); + insns.push(( + Inst::AluRX { + alu_op: ALUOp::And64, + rd: writable_gpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "E31020000080", + "ng %r1, 0(%r2)", + )); + insns.push(( + Inst::AluRX { + alu_op: ALUOp::Orr32, + rd: writable_gpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "56102000", + "o %r1, 0(%r2)", + )); + insns.push(( + Inst::AluRX { + alu_op: ALUOp::Orr32, + rd: writable_gpr(1), + mem: MemArg::BXD20 { + base: gpr(2), + index: zero_reg(), + disp: SImm20::zero(), + flags: MemFlags::trusted(), + }, + }, + "E31020000056", + "oy %r1, 0(%r2)", + )); + insns.push(( + Inst::AluRX { + alu_op: ALUOp::Orr64, + rd: writable_gpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "E31020000081", + "og %r1, 0(%r2)", + )); + insns.push(( + Inst::AluRX { + alu_op: ALUOp::Xor32, + rd: writable_gpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "57102000", + "x %r1, 0(%r2)", + )); + insns.push(( + Inst::AluRX { + alu_op: ALUOp::Xor32, + rd: writable_gpr(1), + mem: MemArg::BXD20 { + base: gpr(2), + index: zero_reg(), + disp: SImm20::zero(), + flags: MemFlags::trusted(), + }, + }, + "E31020000057", + "xy %r1, 0(%r2)", + )); + insns.push(( + Inst::AluRX { + alu_op: ALUOp::Xor64, + rd: writable_gpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "E31020000082", + "xg %r1, 0(%r2)", + )); + + insns.push(( + Inst::AluRSImm16 { + alu_op: ALUOp::Add32, + rd: writable_gpr(7), + imm: -32768, + }, + "A77A8000", + "ahi %r7, -32768", + )); + insns.push(( + Inst::AluRSImm16 { + alu_op: ALUOp::Add32, + rd: writable_gpr(7), + imm: 32767, + }, + "A77A7FFF", + "ahi %r7, 32767", + )); + insns.push(( + Inst::AluRSImm16 { + alu_op: ALUOp::Add64, + rd: writable_gpr(7), + imm: -32768, + }, + "A77B8000", + "aghi %r7, -32768", + )); + insns.push(( + Inst::AluRSImm16 { + alu_op: ALUOp::Add64, + rd: writable_gpr(7), + imm: 32767, + }, + "A77B7FFF", + "aghi %r7, 32767", + )); + insns.push(( + Inst::AluRSImm16 { + alu_op: ALUOp::Mul32, + rd: writable_gpr(7), + imm: -32768, + }, + "A77C8000", + "mhi %r7, -32768", + )); + insns.push(( + Inst::AluRSImm16 { + alu_op: ALUOp::Mul32, + rd: writable_gpr(7), + imm: 32767, + }, + "A77C7FFF", + "mhi %r7, 32767", + )); + insns.push(( + Inst::AluRSImm16 { + alu_op: ALUOp::Mul64, + rd: writable_gpr(7), + imm: -32768, + }, + "A77D8000", + "mghi %r7, -32768", + )); + insns.push(( + Inst::AluRSImm16 { + alu_op: ALUOp::Mul64, + rd: writable_gpr(7), + imm: 32767, + }, + "A77D7FFF", + "mghi %r7, 32767", + )); + + insns.push(( + Inst::AluRSImm32 { + alu_op: ALUOp::Add32, + rd: writable_gpr(7), + imm: -2147483648, + }, + "C27980000000", + "afi %r7, -2147483648", + )); + insns.push(( + Inst::AluRSImm32 { + alu_op: ALUOp::Add32, + rd: writable_gpr(7), + imm: 2147483647, + }, + "C2797FFFFFFF", + "afi %r7, 2147483647", + )); + insns.push(( + Inst::AluRSImm32 { + alu_op: ALUOp::Mul32, + rd: writable_gpr(7), + imm: -2147483648, + }, + "C27180000000", + "msfi %r7, -2147483648", + )); + insns.push(( + Inst::AluRSImm32 { + alu_op: ALUOp::Mul32, + rd: writable_gpr(7), + imm: 2147483647, + }, + "C2717FFFFFFF", + "msfi %r7, 2147483647", + )); + insns.push(( + Inst::AluRSImm32 { + alu_op: ALUOp::Add64, + rd: writable_gpr(7), + imm: -2147483648, + }, + "C27880000000", + "agfi %r7, -2147483648", + )); + insns.push(( + Inst::AluRSImm32 { + alu_op: ALUOp::Add64, + rd: writable_gpr(7), + imm: 2147483647, + }, + "C2787FFFFFFF", + "agfi %r7, 2147483647", + )); + insns.push(( + Inst::AluRSImm32 { + alu_op: ALUOp::Mul64, + rd: writable_gpr(7), + imm: -2147483648, + }, + "C27080000000", + "msgfi %r7, -2147483648", + )); + insns.push(( + Inst::AluRSImm32 { + alu_op: ALUOp::Mul64, + rd: writable_gpr(7), + imm: 2147483647, + }, + "C2707FFFFFFF", + "msgfi %r7, 2147483647", + )); + + insns.push(( + Inst::AluRUImm32 { + alu_op: ALUOp::Add32, + rd: writable_gpr(7), + imm: 0, + }, + "C27B00000000", + "alfi %r7, 0", + )); + insns.push(( + Inst::AluRUImm32 { + alu_op: ALUOp::Add32, + rd: writable_gpr(7), + imm: 4294967295, + }, + "C27BFFFFFFFF", + "alfi %r7, 4294967295", + )); + insns.push(( + Inst::AluRUImm32 { + alu_op: ALUOp::Sub32, + rd: writable_gpr(7), + imm: 0, + }, + "C27500000000", + "slfi %r7, 0", + )); + insns.push(( + Inst::AluRUImm32 { + alu_op: ALUOp::Sub32, + rd: writable_gpr(7), + imm: 4294967295, + }, + "C275FFFFFFFF", + "slfi %r7, 4294967295", + )); + insns.push(( + Inst::AluRUImm32 { + alu_op: ALUOp::Add64, + rd: writable_gpr(7), + imm: 0, + }, + "C27A00000000", + "algfi %r7, 0", + )); + insns.push(( + Inst::AluRUImm32 { + alu_op: ALUOp::Add64, + rd: writable_gpr(7), + imm: 4294967295, + }, + "C27AFFFFFFFF", + "algfi %r7, 4294967295", + )); + insns.push(( + Inst::AluRUImm32 { + alu_op: ALUOp::Sub64, + rd: writable_gpr(7), + imm: 0, + }, + "C27400000000", + "slgfi %r7, 0", + )); + insns.push(( + Inst::AluRUImm32 { + alu_op: ALUOp::Sub64, + rd: writable_gpr(7), + imm: 4294967295, + }, + "C274FFFFFFFF", + "slgfi %r7, 4294967295", + )); + + insns.push(( + Inst::AluRUImm16Shifted { + alu_op: ALUOp::And32, + rd: writable_gpr(8), + imm: UImm16Shifted::maybe_from_u64(0x0000_ffff).unwrap(), + }, + "A587FFFF", + "nill %r8, 65535", + )); + insns.push(( + Inst::AluRUImm16Shifted { + alu_op: ALUOp::And32, + rd: writable_gpr(8), + imm: UImm16Shifted::maybe_from_u64(0xffff_0000).unwrap(), + }, + "A586FFFF", + "nilh %r8, 65535", + )); + insns.push(( + Inst::AluRUImm16Shifted { + alu_op: ALUOp::And64, + rd: writable_gpr(8), + imm: UImm16Shifted::maybe_from_u64(0x0000_0000_0000_ffff).unwrap(), + }, + "A587FFFF", + "nill %r8, 65535", + )); + insns.push(( + Inst::AluRUImm16Shifted { + alu_op: ALUOp::And64, + rd: writable_gpr(8), + imm: UImm16Shifted::maybe_from_u64(0x0000_0000_ffff_0000).unwrap(), + }, + "A586FFFF", + "nilh %r8, 65535", + )); + insns.push(( + Inst::AluRUImm16Shifted { + alu_op: ALUOp::And64, + rd: writable_gpr(8), + imm: UImm16Shifted::maybe_from_u64(0x0000_ffff_0000_0000).unwrap(), + }, + "A585FFFF", + "nihl %r8, 65535", + )); + insns.push(( + Inst::AluRUImm16Shifted { + alu_op: ALUOp::And64, + rd: writable_gpr(8), + imm: UImm16Shifted::maybe_from_u64(0xffff_0000_0000_0000).unwrap(), + }, + "A584FFFF", + "nihh %r8, 65535", + )); + insns.push(( + Inst::AluRUImm16Shifted { + alu_op: ALUOp::Orr32, + rd: writable_gpr(8), + imm: UImm16Shifted::maybe_from_u64(0x0000_ffff).unwrap(), + }, + "A58BFFFF", + "oill %r8, 65535", + )); + insns.push(( + Inst::AluRUImm16Shifted { + alu_op: ALUOp::Orr32, + rd: writable_gpr(8), + imm: UImm16Shifted::maybe_from_u64(0xffff_0000).unwrap(), + }, + "A58AFFFF", + "oilh %r8, 65535", + )); + insns.push(( + Inst::AluRUImm16Shifted { + alu_op: ALUOp::Orr64, + rd: writable_gpr(8), + imm: UImm16Shifted::maybe_from_u64(0x0000_0000_0000_ffff).unwrap(), + }, + "A58BFFFF", + "oill %r8, 65535", + )); + insns.push(( + Inst::AluRUImm16Shifted { + alu_op: ALUOp::Orr64, + rd: writable_gpr(8), + imm: UImm16Shifted::maybe_from_u64(0x0000_0000_ffff_0000).unwrap(), + }, + "A58AFFFF", + "oilh %r8, 65535", + )); + insns.push(( + Inst::AluRUImm16Shifted { + alu_op: ALUOp::Orr64, + rd: writable_gpr(8), + imm: UImm16Shifted::maybe_from_u64(0x0000_ffff_0000_0000).unwrap(), + }, + "A589FFFF", + "oihl %r8, 65535", + )); + insns.push(( + Inst::AluRUImm16Shifted { + alu_op: ALUOp::Orr64, + rd: writable_gpr(8), + imm: UImm16Shifted::maybe_from_u64(0xffff_0000_0000_0000).unwrap(), + }, + "A588FFFF", + "oihh %r8, 65535", + )); + + insns.push(( + Inst::AluRUImm32Shifted { + alu_op: ALUOp::And32, + rd: writable_gpr(8), + imm: UImm32Shifted::maybe_from_u64(0xffff_ffff).unwrap(), + }, + "C08BFFFFFFFF", + "nilf %r8, 4294967295", + )); + insns.push(( + Inst::AluRUImm32Shifted { + alu_op: ALUOp::And64, + rd: writable_gpr(8), + imm: UImm32Shifted::maybe_from_u64(0x0000_0000_ffff_ffff).unwrap(), + }, + "C08BFFFFFFFF", + "nilf %r8, 4294967295", + )); + insns.push(( + Inst::AluRUImm32Shifted { + alu_op: ALUOp::And64, + rd: writable_gpr(8), + imm: UImm32Shifted::maybe_from_u64(0xffff_ffff_0000_0000).unwrap(), + }, + "C08AFFFFFFFF", + "nihf %r8, 4294967295", + )); + insns.push(( + Inst::AluRUImm32Shifted { + alu_op: ALUOp::Orr32, + rd: writable_gpr(8), + imm: UImm32Shifted::maybe_from_u64(0xffff_ffff).unwrap(), + }, + "C08DFFFFFFFF", + "oilf %r8, 4294967295", + )); + insns.push(( + Inst::AluRUImm32Shifted { + alu_op: ALUOp::Orr64, + rd: writable_gpr(8), + imm: UImm32Shifted::maybe_from_u64(0x0000_0000_ffff_ffff).unwrap(), + }, + "C08DFFFFFFFF", + "oilf %r8, 4294967295", + )); + insns.push(( + Inst::AluRUImm32Shifted { + alu_op: ALUOp::Orr64, + rd: writable_gpr(8), + imm: UImm32Shifted::maybe_from_u64(0xffff_ffff_0000_0000).unwrap(), + }, + "C08CFFFFFFFF", + "oihf %r8, 4294967295", + )); + insns.push(( + Inst::AluRUImm32Shifted { + alu_op: ALUOp::Xor32, + rd: writable_gpr(8), + imm: UImm32Shifted::maybe_from_u64(0xffff_ffff).unwrap(), + }, + "C087FFFFFFFF", + "xilf %r8, 4294967295", + )); + insns.push(( + Inst::AluRUImm32Shifted { + alu_op: ALUOp::Xor64, + rd: writable_gpr(8), + imm: UImm32Shifted::maybe_from_u64(0x0000_0000_ffff_ffff).unwrap(), + }, + "C087FFFFFFFF", + "xilf %r8, 4294967295", + )); + insns.push(( + Inst::AluRUImm32Shifted { + alu_op: ALUOp::Xor64, + rd: writable_gpr(8), + imm: UImm32Shifted::maybe_from_u64(0xffff_ffff_0000_0000).unwrap(), + }, + "C086FFFFFFFF", + "xihf %r8, 4294967295", + )); + + insns.push(( + Inst::UnaryRR { + op: UnaryOp::Abs32, + rd: writable_gpr(1), + rn: gpr(10), + }, + "101A", + "lpr %r1, %r10", + )); + insns.push(( + Inst::UnaryRR { + op: UnaryOp::Abs64, + rd: writable_gpr(1), + rn: gpr(10), + }, + "B900001A", + "lpgr %r1, %r10", + )); + insns.push(( + Inst::UnaryRR { + op: UnaryOp::Abs64Ext32, + rd: writable_gpr(1), + rn: gpr(10), + }, + "B910001A", + "lpgfr %r1, %r10", + )); + insns.push(( + Inst::UnaryRR { + op: UnaryOp::Neg32, + rd: writable_gpr(1), + rn: gpr(10), + }, + "131A", + "lcr %r1, %r10", + )); + insns.push(( + Inst::UnaryRR { + op: UnaryOp::Neg64, + rd: writable_gpr(1), + rn: gpr(10), + }, + "B903001A", + "lcgr %r1, %r10", + )); + insns.push(( + Inst::UnaryRR { + op: UnaryOp::Neg64Ext32, + rd: writable_gpr(1), + rn: gpr(10), + }, + "B913001A", + "lcgfr %r1, %r10", + )); + insns.push(( + Inst::UnaryRR { + op: UnaryOp::PopcntByte, + rd: writable_gpr(1), + rn: gpr(10), + }, + "B9E1001A", + "popcnt %r1, %r10", + )); + insns.push(( + Inst::UnaryRR { + op: UnaryOp::PopcntReg, + rd: writable_gpr(1), + rn: gpr(10), + }, + "B9E1801A", + "popcnt %r1, %r10, 8", + )); + + insns.push(( + Inst::CmpRR { + op: CmpOp::CmpS32, + rn: gpr(5), + rm: gpr(6), + }, + "1956", + "cr %r5, %r6", + )); + insns.push(( + Inst::CmpRR { + op: CmpOp::CmpS64, + rn: gpr(5), + rm: gpr(6), + }, + "B9200056", + "cgr %r5, %r6", + )); + insns.push(( + Inst::CmpRR { + op: CmpOp::CmpS64Ext32, + rn: gpr(5), + rm: gpr(6), + }, + "B9300056", + "cgfr %r5, %r6", + )); + insns.push(( + Inst::CmpRR { + op: CmpOp::CmpL32, + rn: gpr(5), + rm: gpr(6), + }, + "1556", + "clr %r5, %r6", + )); + insns.push(( + Inst::CmpRR { + op: CmpOp::CmpL64, + rn: gpr(5), + rm: gpr(6), + }, + "B9210056", + "clgr %r5, %r6", + )); + insns.push(( + Inst::CmpRR { + op: CmpOp::CmpL64Ext32, + rn: gpr(5), + rm: gpr(6), + }, + "B9310056", + "clgfr %r5, %r6", + )); + + insns.push(( + Inst::CmpRX { + op: CmpOp::CmpS32, + rn: gpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "59102000", + "c %r1, 0(%r2)", + )); + insns.push(( + Inst::CmpRX { + op: CmpOp::CmpS32, + rn: gpr(1), + mem: MemArg::BXD20 { + base: gpr(2), + index: zero_reg(), + disp: SImm20::zero(), + flags: MemFlags::trusted(), + }, + }, + "E31020000059", + "cy %r1, 0(%r2)", + )); + insns.push(( + Inst::CmpRX { + op: CmpOp::CmpS32, + rn: gpr(1), + mem: MemArg::Label { + target: BranchTarget::ResolvedOffset(64), + }, + }, + "C61D00000020", + "crl %r1, 64", + )); + insns.push(( + Inst::CmpRX { + op: CmpOp::CmpS32Ext16, + rn: gpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "49102000", + "ch %r1, 0(%r2)", + )); + insns.push(( + Inst::CmpRX { + op: CmpOp::CmpS32Ext16, + rn: gpr(1), + mem: MemArg::BXD20 { + base: gpr(2), + index: zero_reg(), + disp: SImm20::zero(), + flags: MemFlags::trusted(), + }, + }, + "E31020000079", + "chy %r1, 0(%r2)", + )); + insns.push(( + Inst::CmpRX { + op: CmpOp::CmpS32Ext16, + rn: gpr(1), + mem: MemArg::Label { + target: BranchTarget::ResolvedOffset(64), + }, + }, + "C61500000020", + "chrl %r1, 64", + )); + insns.push(( + Inst::CmpRX { + op: CmpOp::CmpS64, + rn: gpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "E31020000020", + "cg %r1, 0(%r2)", + )); + insns.push(( + Inst::CmpRX { + op: CmpOp::CmpS64, + rn: gpr(1), + mem: MemArg::Label { + target: BranchTarget::ResolvedOffset(64), + }, + }, + "C61800000020", + "cgrl %r1, 64", + )); + insns.push(( + Inst::CmpRX { + op: CmpOp::CmpS64Ext16, + rn: gpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "E31020000034", + "cgh %r1, 0(%r2)", + )); + insns.push(( + Inst::CmpRX { + op: CmpOp::CmpS64Ext16, + rn: gpr(1), + mem: MemArg::Label { + target: BranchTarget::ResolvedOffset(64), + }, + }, + "C61400000020", + "cghrl %r1, 64", + )); + insns.push(( + Inst::CmpRX { + op: CmpOp::CmpS64Ext32, + rn: gpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "E31020000030", + "cgf %r1, 0(%r2)", + )); + insns.push(( + Inst::CmpRX { + op: CmpOp::CmpS64Ext32, + rn: gpr(1), + mem: MemArg::Label { + target: BranchTarget::ResolvedOffset(64), + }, + }, + "C61C00000020", + "cgfrl %r1, 64", + )); + insns.push(( + Inst::CmpRX { + op: CmpOp::CmpL32, + rn: gpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "55102000", + "cl %r1, 0(%r2)", + )); + insns.push(( + Inst::CmpRX { + op: CmpOp::CmpL32, + rn: gpr(1), + mem: MemArg::BXD20 { + base: gpr(2), + index: zero_reg(), + disp: SImm20::zero(), + flags: MemFlags::trusted(), + }, + }, + "E31020000055", + "cly %r1, 0(%r2)", + )); + insns.push(( + Inst::CmpRX { + op: CmpOp::CmpL32, + rn: gpr(1), + mem: MemArg::Label { + target: BranchTarget::ResolvedOffset(64), + }, + }, + "C61F00000020", + "clrl %r1, 64", + )); + insns.push(( + Inst::CmpRX { + op: CmpOp::CmpL32Ext16, + rn: gpr(1), + mem: MemArg::Label { + target: BranchTarget::ResolvedOffset(64), + }, + }, + "C61700000020", + "clhrl %r1, 64", + )); + insns.push(( + Inst::CmpRX { + op: CmpOp::CmpL64, + rn: gpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "E31020000021", + "clg %r1, 0(%r2)", + )); + insns.push(( + Inst::CmpRX { + op: CmpOp::CmpL64, + rn: gpr(1), + mem: MemArg::Label { + target: BranchTarget::ResolvedOffset(64), + }, + }, + "C61A00000020", + "clgrl %r1, 64", + )); + insns.push(( + Inst::CmpRX { + op: CmpOp::CmpL64Ext16, + rn: gpr(1), + mem: MemArg::Label { + target: BranchTarget::ResolvedOffset(64), + }, + }, + "C61600000020", + "clghrl %r1, 64", + )); + insns.push(( + Inst::CmpRX { + op: CmpOp::CmpL64Ext32, + rn: gpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "E31020000031", + "clgf %r1, 0(%r2)", + )); + insns.push(( + Inst::CmpRX { + op: CmpOp::CmpL64Ext32, + rn: gpr(1), + mem: MemArg::Label { + target: BranchTarget::ResolvedOffset(64), + }, + }, + "C61E00000020", + "clgfrl %r1, 64", + )); + + insns.push(( + Inst::CmpRSImm16 { + op: CmpOp::CmpS32, + rn: gpr(7), + imm: -32768, + }, + "A77E8000", + "chi %r7, -32768", + )); + insns.push(( + Inst::CmpRSImm16 { + op: CmpOp::CmpS32, + rn: gpr(7), + imm: 32767, + }, + "A77E7FFF", + "chi %r7, 32767", + )); + insns.push(( + Inst::CmpRSImm16 { + op: CmpOp::CmpS64, + rn: gpr(7), + imm: -32768, + }, + "A77F8000", + "cghi %r7, -32768", + )); + insns.push(( + Inst::CmpRSImm16 { + op: CmpOp::CmpS64, + rn: gpr(7), + imm: 32767, + }, + "A77F7FFF", + "cghi %r7, 32767", + )); + insns.push(( + Inst::CmpRSImm32 { + op: CmpOp::CmpS32, + rn: gpr(7), + imm: -2147483648, + }, + "C27D80000000", + "cfi %r7, -2147483648", + )); + insns.push(( + Inst::CmpRSImm32 { + op: CmpOp::CmpS32, + rn: gpr(7), + imm: 2147483647, + }, + "C27D7FFFFFFF", + "cfi %r7, 2147483647", + )); + insns.push(( + Inst::CmpRSImm32 { + op: CmpOp::CmpS64, + rn: gpr(7), + imm: -2147483648, + }, + "C27C80000000", + "cgfi %r7, -2147483648", + )); + insns.push(( + Inst::CmpRSImm32 { + op: CmpOp::CmpS64, + rn: gpr(7), + imm: 2147483647, + }, + "C27C7FFFFFFF", + "cgfi %r7, 2147483647", + )); + insns.push(( + Inst::CmpRUImm32 { + op: CmpOp::CmpL32, + rn: gpr(7), + imm: 0, + }, + "C27F00000000", + "clfi %r7, 0", + )); + insns.push(( + Inst::CmpRUImm32 { + op: CmpOp::CmpL32, + rn: gpr(7), + imm: 4294967295, + }, + "C27FFFFFFFFF", + "clfi %r7, 4294967295", + )); + insns.push(( + Inst::CmpRUImm32 { + op: CmpOp::CmpL64, + rn: gpr(7), + imm: 0, + }, + "C27E00000000", + "clgfi %r7, 0", + )); + insns.push(( + Inst::CmpRUImm32 { + op: CmpOp::CmpL64, + rn: gpr(7), + imm: 4294967295, + }, + "C27EFFFFFFFF", + "clgfi %r7, 4294967295", + )); + + insns.push(( + Inst::CmpTrapRR { + op: CmpOp::CmpS32, + rn: gpr(5), + rm: gpr(6), + cond: Cond::from_mask(8), + trap_code: TrapCode::StackOverflow, + }, + "B9728056", + "crte %r5, %r6", + )); + insns.push(( + Inst::CmpTrapRR { + op: CmpOp::CmpS64, + rn: gpr(5), + rm: gpr(6), + cond: Cond::from_mask(8), + trap_code: TrapCode::StackOverflow, + }, + "B9608056", + "cgrte %r5, %r6", + )); + insns.push(( + Inst::CmpTrapRR { + op: CmpOp::CmpL32, + rn: gpr(5), + rm: gpr(6), + cond: Cond::from_mask(8), + trap_code: TrapCode::StackOverflow, + }, + "B9738056", + "clrte %r5, %r6", + )); + insns.push(( + Inst::CmpTrapRR { + op: CmpOp::CmpL64, + rn: gpr(5), + rm: gpr(6), + cond: Cond::from_mask(8), + trap_code: TrapCode::StackOverflow, + }, + "B9618056", + "clgrte %r5, %r6", + )); + insns.push(( + Inst::CmpTrapRSImm16 { + op: CmpOp::CmpS32, + rn: gpr(7), + imm: -32768, + cond: Cond::from_mask(8), + trap_code: TrapCode::StackOverflow, + }, + "EC7080008072", + "cite %r7, -32768", + )); + insns.push(( + Inst::CmpTrapRSImm16 { + op: CmpOp::CmpS32, + rn: gpr(7), + imm: 32767, + cond: Cond::from_mask(8), + trap_code: TrapCode::StackOverflow, + }, + "EC707FFF8072", + "cite %r7, 32767", + )); + insns.push(( + Inst::CmpTrapRSImm16 { + op: CmpOp::CmpS64, + rn: gpr(7), + imm: -32768, + cond: Cond::from_mask(8), + trap_code: TrapCode::StackOverflow, + }, + "EC7080008070", + "cgite %r7, -32768", + )); + insns.push(( + Inst::CmpTrapRSImm16 { + op: CmpOp::CmpS64, + rn: gpr(7), + imm: 32767, + cond: Cond::from_mask(8), + trap_code: TrapCode::StackOverflow, + }, + "EC707FFF8070", + "cgite %r7, 32767", + )); + insns.push(( + Inst::CmpTrapRUImm16 { + op: CmpOp::CmpL32, + rn: gpr(7), + imm: 0, + cond: Cond::from_mask(8), + trap_code: TrapCode::StackOverflow, + }, + "EC7000008073", + "clfite %r7, 0", + )); + insns.push(( + Inst::CmpTrapRUImm16 { + op: CmpOp::CmpL32, + rn: gpr(7), + imm: 65535, + cond: Cond::from_mask(8), + trap_code: TrapCode::StackOverflow, + }, + "EC70FFFF8073", + "clfite %r7, 65535", + )); + insns.push(( + Inst::CmpTrapRUImm16 { + op: CmpOp::CmpL64, + rn: gpr(7), + imm: 0, + cond: Cond::from_mask(8), + trap_code: TrapCode::StackOverflow, + }, + "EC7000008071", + "clgite %r7, 0", + )); + insns.push(( + Inst::CmpTrapRUImm16 { + op: CmpOp::CmpL64, + rn: gpr(7), + imm: 65535, + cond: Cond::from_mask(8), + trap_code: TrapCode::StackOverflow, + }, + "EC70FFFF8071", + "clgite %r7, 65535", + )); + + insns.push(( + Inst::SMulWide { + rn: gpr(5), + rm: gpr(6), + }, + "B9EC6005", + "mgrk %r0, %r5, %r6", + )); + insns.push((Inst::UMulWide { rn: gpr(5) }, "B9860005", "mlgr %r0, %r5")); + insns.push((Inst::SDivMod32 { rn: gpr(5) }, "B91D0005", "dsgfr %r0, %r5")); + insns.push((Inst::SDivMod64 { rn: gpr(5) }, "B90D0005", "dsgr %r0, %r5")); + insns.push((Inst::UDivMod32 { rn: gpr(5) }, "B9970005", "dlr %r0, %r5")); + insns.push((Inst::UDivMod64 { rn: gpr(5) }, "B9870005", "dlgr %r0, %r5")); + + insns.push((Inst::Flogr { rn: gpr(5) }, "B9830005", "flogr %r0, %r5")); + + insns.push(( + Inst::ShiftRR { + shift_op: ShiftOp::RotL32, + rd: writable_gpr(4), + rn: gpr(5), + shift_imm: SImm20::maybe_from_i64(-524288).unwrap(), + shift_reg: None, + }, + "EB450000801D", + "rll %r4, %r5, -524288", + )); + insns.push(( + Inst::ShiftRR { + shift_op: ShiftOp::RotL32, + rd: writable_gpr(4), + rn: gpr(5), + shift_imm: SImm20::maybe_from_i64(524287).unwrap(), + shift_reg: None, + }, + "EB450FFF7F1D", + "rll %r4, %r5, 524287", + )); + insns.push(( + Inst::ShiftRR { + shift_op: ShiftOp::RotL32, + rd: writable_gpr(4), + rn: gpr(5), + shift_imm: SImm20::maybe_from_i64(-524288).unwrap(), + shift_reg: Some(gpr(6)), + }, + "EB456000801D", + "rll %r4, %r5, -524288(%r6)", + )); + insns.push(( + Inst::ShiftRR { + shift_op: ShiftOp::RotL32, + rd: writable_gpr(4), + rn: gpr(5), + shift_imm: SImm20::maybe_from_i64(524287).unwrap(), + shift_reg: Some(gpr(6)), + }, + "EB456FFF7F1D", + "rll %r4, %r5, 524287(%r6)", + )); + insns.push(( + Inst::ShiftRR { + shift_op: ShiftOp::RotL64, + rd: writable_gpr(4), + rn: gpr(5), + shift_imm: SImm20::maybe_from_i64(-524288).unwrap(), + shift_reg: None, + }, + "EB450000801C", + "rllg %r4, %r5, -524288", + )); + insns.push(( + Inst::ShiftRR { + shift_op: ShiftOp::RotL64, + rd: writable_gpr(4), + rn: gpr(5), + shift_imm: SImm20::maybe_from_i64(524287).unwrap(), + shift_reg: None, + }, + "EB450FFF7F1C", + "rllg %r4, %r5, 524287", + )); + insns.push(( + Inst::ShiftRR { + shift_op: ShiftOp::RotL64, + rd: writable_gpr(4), + rn: gpr(5), + shift_imm: SImm20::maybe_from_i64(-524288).unwrap(), + shift_reg: Some(gpr(6)), + }, + "EB456000801C", + "rllg %r4, %r5, -524288(%r6)", + )); + insns.push(( + Inst::ShiftRR { + shift_op: ShiftOp::RotL64, + rd: writable_gpr(4), + rn: gpr(5), + shift_imm: SImm20::maybe_from_i64(524287).unwrap(), + shift_reg: Some(gpr(6)), + }, + "EB456FFF7F1C", + "rllg %r4, %r5, 524287(%r6)", + )); + insns.push(( + Inst::ShiftRR { + shift_op: ShiftOp::LShL32, + rd: writable_gpr(4), + rn: gpr(5), + shift_imm: SImm20::maybe_from_i64(-524288).unwrap(), + shift_reg: None, + }, + "EB45000080DF", + "sllk %r4, %r5, -524288", + )); + insns.push(( + Inst::ShiftRR { + shift_op: ShiftOp::LShL32, + rd: writable_gpr(4), + rn: gpr(5), + shift_imm: SImm20::maybe_from_i64(524287).unwrap(), + shift_reg: None, + }, + "EB450FFF7FDF", + "sllk %r4, %r5, 524287", + )); + insns.push(( + Inst::ShiftRR { + shift_op: ShiftOp::LShL32, + rd: writable_gpr(4), + rn: gpr(5), + shift_imm: SImm20::maybe_from_i64(-524288).unwrap(), + shift_reg: Some(gpr(6)), + }, + "EB45600080DF", + "sllk %r4, %r5, -524288(%r6)", + )); + insns.push(( + Inst::ShiftRR { + shift_op: ShiftOp::LShL32, + rd: writable_gpr(4), + rn: gpr(5), + shift_imm: SImm20::maybe_from_i64(524287).unwrap(), + shift_reg: Some(gpr(6)), + }, + "EB456FFF7FDF", + "sllk %r4, %r5, 524287(%r6)", + )); + insns.push(( + Inst::ShiftRR { + shift_op: ShiftOp::LShL64, + rd: writable_gpr(4), + rn: gpr(5), + shift_imm: SImm20::maybe_from_i64(-524288).unwrap(), + shift_reg: None, + }, + "EB450000800D", + "sllg %r4, %r5, -524288", + )); + insns.push(( + Inst::ShiftRR { + shift_op: ShiftOp::LShL64, + rd: writable_gpr(4), + rn: gpr(5), + shift_imm: SImm20::maybe_from_i64(524287).unwrap(), + shift_reg: None, + }, + "EB450FFF7F0D", + "sllg %r4, %r5, 524287", + )); + insns.push(( + Inst::ShiftRR { + shift_op: ShiftOp::LShL64, + rd: writable_gpr(4), + rn: gpr(5), + shift_imm: SImm20::maybe_from_i64(-524288).unwrap(), + shift_reg: Some(gpr(6)), + }, + "EB456000800D", + "sllg %r4, %r5, -524288(%r6)", + )); + insns.push(( + Inst::ShiftRR { + shift_op: ShiftOp::LShL64, + rd: writable_gpr(4), + rn: gpr(5), + shift_imm: SImm20::maybe_from_i64(524287).unwrap(), + shift_reg: Some(gpr(6)), + }, + "EB456FFF7F0D", + "sllg %r4, %r5, 524287(%r6)", + )); + insns.push(( + Inst::ShiftRR { + shift_op: ShiftOp::LShR32, + rd: writable_gpr(4), + rn: gpr(5), + shift_imm: SImm20::maybe_from_i64(-524288).unwrap(), + shift_reg: None, + }, + "EB45000080DE", + "srlk %r4, %r5, -524288", + )); + insns.push(( + Inst::ShiftRR { + shift_op: ShiftOp::LShR32, + rd: writable_gpr(4), + rn: gpr(5), + shift_imm: SImm20::maybe_from_i64(524287).unwrap(), + shift_reg: None, + }, + "EB450FFF7FDE", + "srlk %r4, %r5, 524287", + )); + insns.push(( + Inst::ShiftRR { + shift_op: ShiftOp::LShR32, + rd: writable_gpr(4), + rn: gpr(5), + shift_imm: SImm20::maybe_from_i64(-524288).unwrap(), + shift_reg: Some(gpr(6)), + }, + "EB45600080DE", + "srlk %r4, %r5, -524288(%r6)", + )); + insns.push(( + Inst::ShiftRR { + shift_op: ShiftOp::LShR32, + rd: writable_gpr(4), + rn: gpr(5), + shift_imm: SImm20::maybe_from_i64(524287).unwrap(), + shift_reg: Some(gpr(6)), + }, + "EB456FFF7FDE", + "srlk %r4, %r5, 524287(%r6)", + )); + insns.push(( + Inst::ShiftRR { + shift_op: ShiftOp::LShR64, + rd: writable_gpr(4), + rn: gpr(5), + shift_imm: SImm20::maybe_from_i64(-524288).unwrap(), + shift_reg: None, + }, + "EB450000800C", + "srlg %r4, %r5, -524288", + )); + insns.push(( + Inst::ShiftRR { + shift_op: ShiftOp::LShR64, + rd: writable_gpr(4), + rn: gpr(5), + shift_imm: SImm20::maybe_from_i64(524287).unwrap(), + shift_reg: None, + }, + "EB450FFF7F0C", + "srlg %r4, %r5, 524287", + )); + insns.push(( + Inst::ShiftRR { + shift_op: ShiftOp::LShR64, + rd: writable_gpr(4), + rn: gpr(5), + shift_imm: SImm20::maybe_from_i64(-524288).unwrap(), + shift_reg: Some(gpr(6)), + }, + "EB456000800C", + "srlg %r4, %r5, -524288(%r6)", + )); + insns.push(( + Inst::ShiftRR { + shift_op: ShiftOp::LShR64, + rd: writable_gpr(4), + rn: gpr(5), + shift_imm: SImm20::maybe_from_i64(524287).unwrap(), + shift_reg: Some(gpr(6)), + }, + "EB456FFF7F0C", + "srlg %r4, %r5, 524287(%r6)", + )); + insns.push(( + Inst::ShiftRR { + shift_op: ShiftOp::AShR32, + rd: writable_gpr(4), + rn: gpr(5), + shift_imm: SImm20::maybe_from_i64(-524288).unwrap(), + shift_reg: None, + }, + "EB45000080DC", + "srak %r4, %r5, -524288", + )); + insns.push(( + Inst::ShiftRR { + shift_op: ShiftOp::AShR32, + rd: writable_gpr(4), + rn: gpr(5), + shift_imm: SImm20::maybe_from_i64(524287).unwrap(), + shift_reg: None, + }, + "EB450FFF7FDC", + "srak %r4, %r5, 524287", + )); + insns.push(( + Inst::ShiftRR { + shift_op: ShiftOp::AShR32, + rd: writable_gpr(4), + rn: gpr(5), + shift_imm: SImm20::maybe_from_i64(-524288).unwrap(), + shift_reg: Some(gpr(6)), + }, + "EB45600080DC", + "srak %r4, %r5, -524288(%r6)", + )); + insns.push(( + Inst::ShiftRR { + shift_op: ShiftOp::AShR32, + rd: writable_gpr(4), + rn: gpr(5), + shift_imm: SImm20::maybe_from_i64(524287).unwrap(), + shift_reg: Some(gpr(6)), + }, + "EB456FFF7FDC", + "srak %r4, %r5, 524287(%r6)", + )); + insns.push(( + Inst::ShiftRR { + shift_op: ShiftOp::AShR64, + rd: writable_gpr(4), + rn: gpr(5), + shift_imm: SImm20::maybe_from_i64(-524288).unwrap(), + shift_reg: None, + }, + "EB450000800A", + "srag %r4, %r5, -524288", + )); + insns.push(( + Inst::ShiftRR { + shift_op: ShiftOp::AShR64, + rd: writable_gpr(4), + rn: gpr(5), + shift_imm: SImm20::maybe_from_i64(524287).unwrap(), + shift_reg: None, + }, + "EB450FFF7F0A", + "srag %r4, %r5, 524287", + )); + insns.push(( + Inst::ShiftRR { + shift_op: ShiftOp::AShR64, + rd: writable_gpr(4), + rn: gpr(5), + shift_imm: SImm20::maybe_from_i64(-524288).unwrap(), + shift_reg: Some(gpr(6)), + }, + "EB456000800A", + "srag %r4, %r5, -524288(%r6)", + )); + insns.push(( + Inst::ShiftRR { + shift_op: ShiftOp::AShR64, + rd: writable_gpr(4), + rn: gpr(5), + shift_imm: SImm20::maybe_from_i64(524287).unwrap(), + shift_reg: Some(gpr(6)), + }, + "EB456FFF7F0A", + "srag %r4, %r5, 524287(%r6)", + )); + + insns.push(( + Inst::Load32 { + rd: writable_gpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "58102000", + "l %r1, 0(%r2)", + )); + insns.push(( + Inst::Load32 { + rd: writable_gpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::maybe_from_u64(4095).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "58102FFF", + "l %r1, 4095(%r2)", + )); + insns.push(( + Inst::Load32 { + rd: writable_gpr(1), + mem: MemArg::BXD20 { + base: gpr(2), + index: zero_reg(), + disp: SImm20::maybe_from_i64(-524288).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E31020008058", + "ly %r1, -524288(%r2)", + )); + insns.push(( + Inst::Load32 { + rd: writable_gpr(1), + mem: MemArg::BXD20 { + base: gpr(2), + index: zero_reg(), + disp: SImm20::maybe_from_i64(524287).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3102FFF7F58", + "ly %r1, 524287(%r2)", + )); + insns.push(( + Inst::Load32 { + rd: writable_gpr(1), + mem: MemArg::BXD12 { + base: gpr(3), + index: gpr(2), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "58123000", + "l %r1, 0(%r2,%r3)", + )); + insns.push(( + Inst::Load32 { + rd: writable_gpr(1), + mem: MemArg::BXD12 { + base: gpr(3), + index: gpr(2), + disp: UImm12::maybe_from_u64(4095).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "58123FFF", + "l %r1, 4095(%r2,%r3)", + )); + insns.push(( + Inst::Load32 { + rd: writable_gpr(1), + mem: MemArg::BXD20 { + base: gpr(3), + index: gpr(2), + disp: SImm20::maybe_from_i64(-524288).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E31230008058", + "ly %r1, -524288(%r2,%r3)", + )); + insns.push(( + Inst::Load32 { + rd: writable_gpr(1), + mem: MemArg::BXD20 { + base: gpr(3), + index: gpr(2), + disp: SImm20::maybe_from_i64(524287).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3123FFF7F58", + "ly %r1, 524287(%r2,%r3)", + )); + insns.push(( + Inst::Load32ZExt8 { + rd: writable_gpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "E31020000094", + "llc %r1, 0(%r2)", + )); + insns.push(( + Inst::Load32ZExt8 { + rd: writable_gpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::maybe_from_u64(4095).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3102FFF0094", + "llc %r1, 4095(%r2)", + )); + insns.push(( + Inst::Load32ZExt8 { + rd: writable_gpr(1), + mem: MemArg::BXD20 { + base: gpr(2), + index: zero_reg(), + disp: SImm20::maybe_from_i64(-524288).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E31020008094", + "llc %r1, -524288(%r2)", + )); + insns.push(( + Inst::Load32ZExt8 { + rd: writable_gpr(1), + mem: MemArg::BXD20 { + base: gpr(2), + index: zero_reg(), + disp: SImm20::maybe_from_i64(524287).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3102FFF7F94", + "llc %r1, 524287(%r2)", + )); + insns.push(( + Inst::Load32ZExt8 { + rd: writable_gpr(1), + mem: MemArg::BXD12 { + base: gpr(3), + index: gpr(2), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "E31230000094", + "llc %r1, 0(%r2,%r3)", + )); + insns.push(( + Inst::Load32ZExt8 { + rd: writable_gpr(1), + mem: MemArg::BXD12 { + base: gpr(3), + index: gpr(2), + disp: UImm12::maybe_from_u64(4095).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3123FFF0094", + "llc %r1, 4095(%r2,%r3)", + )); + insns.push(( + Inst::Load32ZExt8 { + rd: writable_gpr(1), + mem: MemArg::BXD20 { + base: gpr(3), + index: gpr(2), + disp: SImm20::maybe_from_i64(-524288).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E31230008094", + "llc %r1, -524288(%r2,%r3)", + )); + insns.push(( + Inst::Load32ZExt8 { + rd: writable_gpr(1), + mem: MemArg::BXD20 { + base: gpr(3), + index: gpr(2), + disp: SImm20::maybe_from_i64(524287).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3123FFF7F94", + "llc %r1, 524287(%r2,%r3)", + )); + insns.push(( + Inst::Load32SExt8 { + rd: writable_gpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "E31020000076", + "lb %r1, 0(%r2)", + )); + insns.push(( + Inst::Load32SExt8 { + rd: writable_gpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::maybe_from_u64(4095).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3102FFF0076", + "lb %r1, 4095(%r2)", + )); + insns.push(( + Inst::Load32SExt8 { + rd: writable_gpr(1), + mem: MemArg::BXD20 { + base: gpr(2), + index: zero_reg(), + disp: SImm20::maybe_from_i64(-524288).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E31020008076", + "lb %r1, -524288(%r2)", + )); + insns.push(( + Inst::Load32SExt8 { + rd: writable_gpr(1), + mem: MemArg::BXD20 { + base: gpr(2), + index: zero_reg(), + disp: SImm20::maybe_from_i64(524287).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3102FFF7F76", + "lb %r1, 524287(%r2)", + )); + insns.push(( + Inst::Load32SExt8 { + rd: writable_gpr(1), + mem: MemArg::BXD12 { + base: gpr(3), + index: gpr(2), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "E31230000076", + "lb %r1, 0(%r2,%r3)", + )); + insns.push(( + Inst::Load32SExt8 { + rd: writable_gpr(1), + mem: MemArg::BXD12 { + base: gpr(3), + index: gpr(2), + disp: UImm12::maybe_from_u64(4095).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3123FFF0076", + "lb %r1, 4095(%r2,%r3)", + )); + insns.push(( + Inst::Load32SExt8 { + rd: writable_gpr(1), + mem: MemArg::BXD20 { + base: gpr(3), + index: gpr(2), + disp: SImm20::maybe_from_i64(-524288).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E31230008076", + "lb %r1, -524288(%r2,%r3)", + )); + insns.push(( + Inst::Load32SExt8 { + rd: writable_gpr(1), + mem: MemArg::BXD20 { + base: gpr(3), + index: gpr(2), + disp: SImm20::maybe_from_i64(524287).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3123FFF7F76", + "lb %r1, 524287(%r2,%r3)", + )); + insns.push(( + Inst::Load32ZExt16 { + rd: writable_gpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "E31020000095", + "llh %r1, 0(%r2)", + )); + insns.push(( + Inst::Load32ZExt16 { + rd: writable_gpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::maybe_from_u64(4095).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3102FFF0095", + "llh %r1, 4095(%r2)", + )); + insns.push(( + Inst::Load32ZExt16 { + rd: writable_gpr(1), + mem: MemArg::BXD20 { + base: gpr(2), + index: zero_reg(), + disp: SImm20::maybe_from_i64(-524288).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E31020008095", + "llh %r1, -524288(%r2)", + )); + insns.push(( + Inst::Load32ZExt16 { + rd: writable_gpr(1), + mem: MemArg::BXD20 { + base: gpr(2), + index: zero_reg(), + disp: SImm20::maybe_from_i64(524287).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3102FFF7F95", + "llh %r1, 524287(%r2)", + )); + insns.push(( + Inst::Load32ZExt16 { + rd: writable_gpr(1), + mem: MemArg::BXD12 { + base: gpr(3), + index: gpr(2), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "E31230000095", + "llh %r1, 0(%r2,%r3)", + )); + insns.push(( + Inst::Load32ZExt16 { + rd: writable_gpr(1), + mem: MemArg::BXD12 { + base: gpr(3), + index: gpr(2), + disp: UImm12::maybe_from_u64(4095).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3123FFF0095", + "llh %r1, 4095(%r2,%r3)", + )); + insns.push(( + Inst::Load32ZExt16 { + rd: writable_gpr(1), + mem: MemArg::BXD20 { + base: gpr(3), + index: gpr(2), + disp: SImm20::maybe_from_i64(-524288).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E31230008095", + "llh %r1, -524288(%r2,%r3)", + )); + insns.push(( + Inst::Load32ZExt16 { + rd: writable_gpr(1), + mem: MemArg::BXD20 { + base: gpr(3), + index: gpr(2), + disp: SImm20::maybe_from_i64(524287).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3123FFF7F95", + "llh %r1, 524287(%r2,%r3)", + )); + insns.push(( + Inst::Load32SExt16 { + rd: writable_gpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "48102000", + "lh %r1, 0(%r2)", + )); + insns.push(( + Inst::Load32SExt16 { + rd: writable_gpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::maybe_from_u64(4095).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "48102FFF", + "lh %r1, 4095(%r2)", + )); + insns.push(( + Inst::Load32SExt16 { + rd: writable_gpr(1), + mem: MemArg::BXD20 { + base: gpr(2), + index: zero_reg(), + disp: SImm20::maybe_from_i64(-524288).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E31020008078", + "lhy %r1, -524288(%r2)", + )); + insns.push(( + Inst::Load32SExt16 { + rd: writable_gpr(1), + mem: MemArg::BXD20 { + base: gpr(2), + index: zero_reg(), + disp: SImm20::maybe_from_i64(524287).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3102FFF7F78", + "lhy %r1, 524287(%r2)", + )); + insns.push(( + Inst::Load32SExt16 { + rd: writable_gpr(1), + mem: MemArg::BXD12 { + base: gpr(3), + index: gpr(2), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "48123000", + "lh %r1, 0(%r2,%r3)", + )); + insns.push(( + Inst::Load32SExt16 { + rd: writable_gpr(1), + mem: MemArg::BXD12 { + base: gpr(3), + index: gpr(2), + disp: UImm12::maybe_from_u64(4095).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "48123FFF", + "lh %r1, 4095(%r2,%r3)", + )); + insns.push(( + Inst::Load32SExt16 { + rd: writable_gpr(1), + mem: MemArg::BXD20 { + base: gpr(3), + index: gpr(2), + disp: SImm20::maybe_from_i64(-524288).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E31230008078", + "lhy %r1, -524288(%r2,%r3)", + )); + insns.push(( + Inst::Load32SExt16 { + rd: writable_gpr(1), + mem: MemArg::BXD20 { + base: gpr(3), + index: gpr(2), + disp: SImm20::maybe_from_i64(524287).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3123FFF7F78", + "lhy %r1, 524287(%r2,%r3)", + )); + insns.push(( + Inst::Load64 { + rd: writable_gpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "E31020000004", + "lg %r1, 0(%r2)", + )); + insns.push(( + Inst::Load64 { + rd: writable_gpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::maybe_from_u64(4095).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3102FFF0004", + "lg %r1, 4095(%r2)", + )); + insns.push(( + Inst::Load64 { + rd: writable_gpr(1), + mem: MemArg::BXD20 { + base: gpr(2), + index: zero_reg(), + disp: SImm20::maybe_from_i64(-524288).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E31020008004", + "lg %r1, -524288(%r2)", + )); + insns.push(( + Inst::Load64 { + rd: writable_gpr(1), + mem: MemArg::BXD20 { + base: gpr(2), + index: zero_reg(), + disp: SImm20::maybe_from_i64(524287).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3102FFF7F04", + "lg %r1, 524287(%r2)", + )); + insns.push(( + Inst::Load64 { + rd: writable_gpr(1), + mem: MemArg::BXD12 { + base: gpr(3), + index: gpr(2), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "E31230000004", + "lg %r1, 0(%r2,%r3)", + )); + insns.push(( + Inst::Load64 { + rd: writable_gpr(1), + mem: MemArg::BXD12 { + base: gpr(3), + index: gpr(2), + disp: UImm12::maybe_from_u64(4095).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3123FFF0004", + "lg %r1, 4095(%r2,%r3)", + )); + insns.push(( + Inst::Load64 { + rd: writable_gpr(1), + mem: MemArg::BXD20 { + base: gpr(3), + index: gpr(2), + disp: SImm20::maybe_from_i64(-524288).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E31230008004", + "lg %r1, -524288(%r2,%r3)", + )); + insns.push(( + Inst::Load64 { + rd: writable_gpr(1), + mem: MemArg::BXD20 { + base: gpr(3), + index: gpr(2), + disp: SImm20::maybe_from_i64(524287).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3123FFF7F04", + "lg %r1, 524287(%r2,%r3)", + )); + insns.push(( + Inst::Load64ZExt8 { + rd: writable_gpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "E31020000090", + "llgc %r1, 0(%r2)", + )); + insns.push(( + Inst::Load64ZExt8 { + rd: writable_gpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::maybe_from_u64(4095).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3102FFF0090", + "llgc %r1, 4095(%r2)", + )); + insns.push(( + Inst::Load64ZExt8 { + rd: writable_gpr(1), + mem: MemArg::BXD20 { + base: gpr(2), + index: zero_reg(), + disp: SImm20::maybe_from_i64(-524288).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E31020008090", + "llgc %r1, -524288(%r2)", + )); + insns.push(( + Inst::Load64ZExt8 { + rd: writable_gpr(1), + mem: MemArg::BXD20 { + base: gpr(2), + index: zero_reg(), + disp: SImm20::maybe_from_i64(524287).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3102FFF7F90", + "llgc %r1, 524287(%r2)", + )); + insns.push(( + Inst::Load64ZExt8 { + rd: writable_gpr(1), + mem: MemArg::BXD12 { + base: gpr(3), + index: gpr(2), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "E31230000090", + "llgc %r1, 0(%r2,%r3)", + )); + insns.push(( + Inst::Load64ZExt8 { + rd: writable_gpr(1), + mem: MemArg::BXD12 { + base: gpr(3), + index: gpr(2), + disp: UImm12::maybe_from_u64(4095).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3123FFF0090", + "llgc %r1, 4095(%r2,%r3)", + )); + insns.push(( + Inst::Load64ZExt8 { + rd: writable_gpr(1), + mem: MemArg::BXD20 { + base: gpr(3), + index: gpr(2), + disp: SImm20::maybe_from_i64(-524288).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E31230008090", + "llgc %r1, -524288(%r2,%r3)", + )); + insns.push(( + Inst::Load64ZExt8 { + rd: writable_gpr(1), + mem: MemArg::BXD20 { + base: gpr(3), + index: gpr(2), + disp: SImm20::maybe_from_i64(524287).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3123FFF7F90", + "llgc %r1, 524287(%r2,%r3)", + )); + insns.push(( + Inst::Load64SExt8 { + rd: writable_gpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "E31020000077", + "lgb %r1, 0(%r2)", + )); + insns.push(( + Inst::Load64SExt8 { + rd: writable_gpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::maybe_from_u64(4095).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3102FFF0077", + "lgb %r1, 4095(%r2)", + )); + insns.push(( + Inst::Load64SExt8 { + rd: writable_gpr(1), + mem: MemArg::BXD20 { + base: gpr(2), + index: zero_reg(), + disp: SImm20::maybe_from_i64(-524288).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E31020008077", + "lgb %r1, -524288(%r2)", + )); + insns.push(( + Inst::Load64SExt8 { + rd: writable_gpr(1), + mem: MemArg::BXD20 { + base: gpr(2), + index: zero_reg(), + disp: SImm20::maybe_from_i64(524287).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3102FFF7F77", + "lgb %r1, 524287(%r2)", + )); + insns.push(( + Inst::Load64SExt8 { + rd: writable_gpr(1), + mem: MemArg::BXD12 { + base: gpr(3), + index: gpr(2), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "E31230000077", + "lgb %r1, 0(%r2,%r3)", + )); + insns.push(( + Inst::Load64SExt8 { + rd: writable_gpr(1), + mem: MemArg::BXD12 { + base: gpr(3), + index: gpr(2), + disp: UImm12::maybe_from_u64(4095).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3123FFF0077", + "lgb %r1, 4095(%r2,%r3)", + )); + insns.push(( + Inst::Load64SExt8 { + rd: writable_gpr(1), + mem: MemArg::BXD20 { + base: gpr(3), + index: gpr(2), + disp: SImm20::maybe_from_i64(-524288).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E31230008077", + "lgb %r1, -524288(%r2,%r3)", + )); + insns.push(( + Inst::Load64SExt8 { + rd: writable_gpr(1), + mem: MemArg::BXD20 { + base: gpr(3), + index: gpr(2), + disp: SImm20::maybe_from_i64(524287).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3123FFF7F77", + "lgb %r1, 524287(%r2,%r3)", + )); + insns.push(( + Inst::Load64ZExt16 { + rd: writable_gpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "E31020000091", + "llgh %r1, 0(%r2)", + )); + insns.push(( + Inst::Load64ZExt16 { + rd: writable_gpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::maybe_from_u64(4095).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3102FFF0091", + "llgh %r1, 4095(%r2)", + )); + insns.push(( + Inst::Load64ZExt16 { + rd: writable_gpr(1), + mem: MemArg::BXD20 { + base: gpr(2), + index: zero_reg(), + disp: SImm20::maybe_from_i64(-524288).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E31020008091", + "llgh %r1, -524288(%r2)", + )); + insns.push(( + Inst::Load64ZExt16 { + rd: writable_gpr(1), + mem: MemArg::BXD20 { + base: gpr(2), + index: zero_reg(), + disp: SImm20::maybe_from_i64(524287).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3102FFF7F91", + "llgh %r1, 524287(%r2)", + )); + insns.push(( + Inst::Load64ZExt16 { + rd: writable_gpr(1), + mem: MemArg::BXD12 { + base: gpr(3), + index: gpr(2), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "E31230000091", + "llgh %r1, 0(%r2,%r3)", + )); + insns.push(( + Inst::Load64ZExt16 { + rd: writable_gpr(1), + mem: MemArg::BXD12 { + base: gpr(3), + index: gpr(2), + disp: UImm12::maybe_from_u64(4095).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3123FFF0091", + "llgh %r1, 4095(%r2,%r3)", + )); + insns.push(( + Inst::Load64ZExt16 { + rd: writable_gpr(1), + mem: MemArg::BXD20 { + base: gpr(3), + index: gpr(2), + disp: SImm20::maybe_from_i64(-524288).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E31230008091", + "llgh %r1, -524288(%r2,%r3)", + )); + insns.push(( + Inst::Load64ZExt16 { + rd: writable_gpr(1), + mem: MemArg::BXD20 { + base: gpr(3), + index: gpr(2), + disp: SImm20::maybe_from_i64(524287).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3123FFF7F91", + "llgh %r1, 524287(%r2,%r3)", + )); + insns.push(( + Inst::Load64SExt16 { + rd: writable_gpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "E31020000015", + "lgh %r1, 0(%r2)", + )); + insns.push(( + Inst::Load64SExt16 { + rd: writable_gpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::maybe_from_u64(4095).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3102FFF0015", + "lgh %r1, 4095(%r2)", + )); + insns.push(( + Inst::Load64SExt16 { + rd: writable_gpr(1), + mem: MemArg::BXD20 { + base: gpr(2), + index: zero_reg(), + disp: SImm20::maybe_from_i64(-524288).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E31020008015", + "lgh %r1, -524288(%r2)", + )); + insns.push(( + Inst::Load64SExt16 { + rd: writable_gpr(1), + mem: MemArg::BXD20 { + base: gpr(2), + index: zero_reg(), + disp: SImm20::maybe_from_i64(524287).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3102FFF7F15", + "lgh %r1, 524287(%r2)", + )); + insns.push(( + Inst::Load64SExt16 { + rd: writable_gpr(1), + mem: MemArg::BXD12 { + base: gpr(3), + index: gpr(2), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "E31230000015", + "lgh %r1, 0(%r2,%r3)", + )); + insns.push(( + Inst::Load64SExt16 { + rd: writable_gpr(1), + mem: MemArg::BXD12 { + base: gpr(3), + index: gpr(2), + disp: UImm12::maybe_from_u64(4095).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3123FFF0015", + "lgh %r1, 4095(%r2,%r3)", + )); + insns.push(( + Inst::Load64SExt16 { + rd: writable_gpr(1), + mem: MemArg::BXD20 { + base: gpr(3), + index: gpr(2), + disp: SImm20::maybe_from_i64(-524288).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E31230008015", + "lgh %r1, -524288(%r2,%r3)", + )); + insns.push(( + Inst::Load64SExt16 { + rd: writable_gpr(1), + mem: MemArg::BXD20 { + base: gpr(3), + index: gpr(2), + disp: SImm20::maybe_from_i64(524287).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3123FFF7F15", + "lgh %r1, 524287(%r2,%r3)", + )); + insns.push(( + Inst::Load64ZExt32 { + rd: writable_gpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "E31020000016", + "llgf %r1, 0(%r2)", + )); + insns.push(( + Inst::Load64ZExt32 { + rd: writable_gpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::maybe_from_u64(4095).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3102FFF0016", + "llgf %r1, 4095(%r2)", + )); + insns.push(( + Inst::Load64ZExt32 { + rd: writable_gpr(1), + mem: MemArg::BXD20 { + base: gpr(2), + index: zero_reg(), + disp: SImm20::maybe_from_i64(-524288).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E31020008016", + "llgf %r1, -524288(%r2)", + )); + insns.push(( + Inst::Load64ZExt32 { + rd: writable_gpr(1), + mem: MemArg::BXD20 { + base: gpr(2), + index: zero_reg(), + disp: SImm20::maybe_from_i64(524287).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3102FFF7F16", + "llgf %r1, 524287(%r2)", + )); + insns.push(( + Inst::Load64ZExt32 { + rd: writable_gpr(1), + mem: MemArg::BXD12 { + base: gpr(3), + index: gpr(2), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "E31230000016", + "llgf %r1, 0(%r2,%r3)", + )); + insns.push(( + Inst::Load64ZExt32 { + rd: writable_gpr(1), + mem: MemArg::BXD12 { + base: gpr(3), + index: gpr(2), + disp: UImm12::maybe_from_u64(4095).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3123FFF0016", + "llgf %r1, 4095(%r2,%r3)", + )); + insns.push(( + Inst::Load64ZExt32 { + rd: writable_gpr(1), + mem: MemArg::BXD20 { + base: gpr(3), + index: gpr(2), + disp: SImm20::maybe_from_i64(-524288).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E31230008016", + "llgf %r1, -524288(%r2,%r3)", + )); + insns.push(( + Inst::Load64ZExt32 { + rd: writable_gpr(1), + mem: MemArg::BXD20 { + base: gpr(3), + index: gpr(2), + disp: SImm20::maybe_from_i64(524287).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3123FFF7F16", + "llgf %r1, 524287(%r2,%r3)", + )); + insns.push(( + Inst::Load64SExt32 { + rd: writable_gpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "E31020000014", + "lgf %r1, 0(%r2)", + )); + insns.push(( + Inst::Load64SExt32 { + rd: writable_gpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::maybe_from_u64(4095).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3102FFF0014", + "lgf %r1, 4095(%r2)", + )); + insns.push(( + Inst::Load64SExt32 { + rd: writable_gpr(1), + mem: MemArg::BXD20 { + base: gpr(2), + index: zero_reg(), + disp: SImm20::maybe_from_i64(-524288).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E31020008014", + "lgf %r1, -524288(%r2)", + )); + insns.push(( + Inst::Load64SExt32 { + rd: writable_gpr(1), + mem: MemArg::BXD20 { + base: gpr(2), + index: zero_reg(), + disp: SImm20::maybe_from_i64(524287).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3102FFF7F14", + "lgf %r1, 524287(%r2)", + )); + insns.push(( + Inst::Load64SExt32 { + rd: writable_gpr(1), + mem: MemArg::BXD12 { + base: gpr(3), + index: gpr(2), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "E31230000014", + "lgf %r1, 0(%r2,%r3)", + )); + insns.push(( + Inst::Load64SExt32 { + rd: writable_gpr(1), + mem: MemArg::BXD12 { + base: gpr(3), + index: gpr(2), + disp: UImm12::maybe_from_u64(4095).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3123FFF0014", + "lgf %r1, 4095(%r2,%r3)", + )); + insns.push(( + Inst::Load64SExt32 { + rd: writable_gpr(1), + mem: MemArg::BXD20 { + base: gpr(3), + index: gpr(2), + disp: SImm20::maybe_from_i64(-524288).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E31230008014", + "lgf %r1, -524288(%r2,%r3)", + )); + insns.push(( + Inst::Load64SExt32 { + rd: writable_gpr(1), + mem: MemArg::BXD20 { + base: gpr(3), + index: gpr(2), + disp: SImm20::maybe_from_i64(524287).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3123FFF7F14", + "lgf %r1, 524287(%r2,%r3)", + )); + + insns.push(( + Inst::Load32 { + rd: writable_gpr(1), + mem: MemArg::Label { + target: BranchTarget::ResolvedOffset(64), + }, + }, + "C41D00000020", + "lrl %r1, 64", + )); + insns.push(( + Inst::Load32SExt16 { + rd: writable_gpr(1), + mem: MemArg::Label { + target: BranchTarget::ResolvedOffset(64), + }, + }, + "C41500000020", + "lhrl %r1, 64", + )); + insns.push(( + Inst::Load32ZExt16 { + rd: writable_gpr(1), + mem: MemArg::Label { + target: BranchTarget::ResolvedOffset(64), + }, + }, + "C41200000020", + "llhrl %r1, 64", + )); + insns.push(( + Inst::Load64 { + rd: writable_gpr(1), + mem: MemArg::Label { + target: BranchTarget::ResolvedOffset(64), + }, + }, + "C41800000020", + "lgrl %r1, 64", + )); + insns.push(( + Inst::Load64SExt16 { + rd: writable_gpr(1), + mem: MemArg::Label { + target: BranchTarget::ResolvedOffset(64), + }, + }, + "C41400000020", + "lghrl %r1, 64", + )); + insns.push(( + Inst::Load64ZExt16 { + rd: writable_gpr(1), + mem: MemArg::Label { + target: BranchTarget::ResolvedOffset(64), + }, + }, + "C41600000020", + "llghrl %r1, 64", + )); + insns.push(( + Inst::Load64SExt32 { + rd: writable_gpr(1), + mem: MemArg::Label { + target: BranchTarget::ResolvedOffset(64), + }, + }, + "C41C00000020", + "lgfrl %r1, 64", + )); + insns.push(( + Inst::Load64ZExt32 { + rd: writable_gpr(1), + mem: MemArg::Label { + target: BranchTarget::ResolvedOffset(64), + }, + }, + "C41E00000020", + "llgfrl %r1, 64", + )); + insns.push(( + Inst::LoadRev16 { + rd: writable_gpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "E3102000001F", + "lrvh %r1, 0(%r2)", + )); + insns.push(( + Inst::LoadRev16 { + rd: writable_gpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::maybe_from_u64(4095).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3102FFF001F", + "lrvh %r1, 4095(%r2)", + )); + insns.push(( + Inst::LoadRev16 { + rd: writable_gpr(1), + mem: MemArg::BXD20 { + base: gpr(2), + index: zero_reg(), + disp: SImm20::maybe_from_i64(-524288).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3102000801F", + "lrvh %r1, -524288(%r2)", + )); + insns.push(( + Inst::LoadRev16 { + rd: writable_gpr(1), + mem: MemArg::BXD20 { + base: gpr(2), + index: zero_reg(), + disp: SImm20::maybe_from_i64(524287).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3102FFF7F1F", + "lrvh %r1, 524287(%r2)", + )); + insns.push(( + Inst::LoadRev16 { + rd: writable_gpr(1), + mem: MemArg::BXD12 { + base: gpr(3), + index: gpr(2), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "E3123000001F", + "lrvh %r1, 0(%r2,%r3)", + )); + insns.push(( + Inst::LoadRev16 { + rd: writable_gpr(1), + mem: MemArg::BXD12 { + base: gpr(3), + index: gpr(2), + disp: UImm12::maybe_from_u64(4095).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3123FFF001F", + "lrvh %r1, 4095(%r2,%r3)", + )); + insns.push(( + Inst::LoadRev16 { + rd: writable_gpr(1), + mem: MemArg::BXD20 { + base: gpr(3), + index: gpr(2), + disp: SImm20::maybe_from_i64(-524288).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3123000801F", + "lrvh %r1, -524288(%r2,%r3)", + )); + insns.push(( + Inst::LoadRev16 { + rd: writable_gpr(1), + mem: MemArg::BXD20 { + base: gpr(3), + index: gpr(2), + disp: SImm20::maybe_from_i64(524287).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3123FFF7F1F", + "lrvh %r1, 524287(%r2,%r3)", + )); + insns.push(( + Inst::LoadRev32 { + rd: writable_gpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "E3102000001E", + "lrv %r1, 0(%r2)", + )); + insns.push(( + Inst::LoadRev32 { + rd: writable_gpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::maybe_from_u64(4095).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3102FFF001E", + "lrv %r1, 4095(%r2)", + )); + insns.push(( + Inst::LoadRev32 { + rd: writable_gpr(1), + mem: MemArg::BXD20 { + base: gpr(2), + index: zero_reg(), + disp: SImm20::maybe_from_i64(-524288).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3102000801E", + "lrv %r1, -524288(%r2)", + )); + insns.push(( + Inst::LoadRev32 { + rd: writable_gpr(1), + mem: MemArg::BXD20 { + base: gpr(2), + index: zero_reg(), + disp: SImm20::maybe_from_i64(524287).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3102FFF7F1E", + "lrv %r1, 524287(%r2)", + )); + insns.push(( + Inst::LoadRev32 { + rd: writable_gpr(1), + mem: MemArg::BXD12 { + base: gpr(3), + index: gpr(2), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "E3123000001E", + "lrv %r1, 0(%r2,%r3)", + )); + insns.push(( + Inst::LoadRev32 { + rd: writable_gpr(1), + mem: MemArg::BXD12 { + base: gpr(3), + index: gpr(2), + disp: UImm12::maybe_from_u64(4095).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3123FFF001E", + "lrv %r1, 4095(%r2,%r3)", + )); + insns.push(( + Inst::LoadRev32 { + rd: writable_gpr(1), + mem: MemArg::BXD20 { + base: gpr(3), + index: gpr(2), + disp: SImm20::maybe_from_i64(-524288).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3123000801E", + "lrv %r1, -524288(%r2,%r3)", + )); + insns.push(( + Inst::LoadRev32 { + rd: writable_gpr(1), + mem: MemArg::BXD20 { + base: gpr(3), + index: gpr(2), + disp: SImm20::maybe_from_i64(524287).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3123FFF7F1E", + "lrv %r1, 524287(%r2,%r3)", + )); + insns.push(( + Inst::LoadRev64 { + rd: writable_gpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "E3102000000F", + "lrvg %r1, 0(%r2)", + )); + insns.push(( + Inst::LoadRev64 { + rd: writable_gpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::maybe_from_u64(4095).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3102FFF000F", + "lrvg %r1, 4095(%r2)", + )); + insns.push(( + Inst::LoadRev64 { + rd: writable_gpr(1), + mem: MemArg::BXD20 { + base: gpr(2), + index: zero_reg(), + disp: SImm20::maybe_from_i64(-524288).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3102000800F", + "lrvg %r1, -524288(%r2)", + )); + insns.push(( + Inst::LoadRev64 { + rd: writable_gpr(1), + mem: MemArg::BXD20 { + base: gpr(2), + index: zero_reg(), + disp: SImm20::maybe_from_i64(524287).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3102FFF7F0F", + "lrvg %r1, 524287(%r2)", + )); + insns.push(( + Inst::LoadRev64 { + rd: writable_gpr(1), + mem: MemArg::BXD12 { + base: gpr(3), + index: gpr(2), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "E3123000000F", + "lrvg %r1, 0(%r2,%r3)", + )); + insns.push(( + Inst::LoadRev64 { + rd: writable_gpr(1), + mem: MemArg::BXD12 { + base: gpr(3), + index: gpr(2), + disp: UImm12::maybe_from_u64(4095).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3123FFF000F", + "lrvg %r1, 4095(%r2,%r3)", + )); + insns.push(( + Inst::LoadRev64 { + rd: writable_gpr(1), + mem: MemArg::BXD20 { + base: gpr(3), + index: gpr(2), + disp: SImm20::maybe_from_i64(-524288).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3123000800F", + "lrvg %r1, -524288(%r2,%r3)", + )); + insns.push(( + Inst::LoadRev64 { + rd: writable_gpr(1), + mem: MemArg::BXD20 { + base: gpr(3), + index: gpr(2), + disp: SImm20::maybe_from_i64(524287).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3123FFF7F0F", + "lrvg %r1, 524287(%r2,%r3)", + )); + + insns.push(( + Inst::Store8 { + rd: gpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "42102000", + "stc %r1, 0(%r2)", + )); + insns.push(( + Inst::Store8 { + rd: gpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::maybe_from_u64(4095).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "42102FFF", + "stc %r1, 4095(%r2)", + )); + insns.push(( + Inst::Store8 { + rd: gpr(1), + mem: MemArg::BXD20 { + base: gpr(2), + index: zero_reg(), + disp: SImm20::maybe_from_i64(-524288).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E31020008072", + "stcy %r1, -524288(%r2)", + )); + insns.push(( + Inst::Store8 { + rd: gpr(1), + mem: MemArg::BXD20 { + base: gpr(2), + index: zero_reg(), + disp: SImm20::maybe_from_i64(524287).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3102FFF7F72", + "stcy %r1, 524287(%r2)", + )); + insns.push(( + Inst::Store8 { + rd: gpr(1), + mem: MemArg::BXD12 { + base: gpr(3), + index: gpr(2), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "42123000", + "stc %r1, 0(%r2,%r3)", + )); + insns.push(( + Inst::Store8 { + rd: gpr(1), + mem: MemArg::BXD12 { + base: gpr(3), + index: gpr(2), + disp: UImm12::maybe_from_u64(4095).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "42123FFF", + "stc %r1, 4095(%r2,%r3)", + )); + insns.push(( + Inst::Store8 { + rd: gpr(1), + mem: MemArg::BXD20 { + base: gpr(3), + index: gpr(2), + disp: SImm20::maybe_from_i64(-524288).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E31230008072", + "stcy %r1, -524288(%r2,%r3)", + )); + insns.push(( + Inst::Store8 { + rd: gpr(1), + mem: MemArg::BXD20 { + base: gpr(3), + index: gpr(2), + disp: SImm20::maybe_from_i64(524287).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3123FFF7F72", + "stcy %r1, 524287(%r2,%r3)", + )); + insns.push(( + Inst::Store16 { + rd: gpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "40102000", + "sth %r1, 0(%r2)", + )); + insns.push(( + Inst::Store16 { + rd: gpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::maybe_from_u64(4095).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "40102FFF", + "sth %r1, 4095(%r2)", + )); + insns.push(( + Inst::Store16 { + rd: gpr(1), + mem: MemArg::BXD20 { + base: gpr(2), + index: zero_reg(), + disp: SImm20::maybe_from_i64(-524288).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E31020008070", + "sthy %r1, -524288(%r2)", + )); + insns.push(( + Inst::Store16 { + rd: gpr(1), + mem: MemArg::BXD20 { + base: gpr(2), + index: zero_reg(), + disp: SImm20::maybe_from_i64(524287).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3102FFF7F70", + "sthy %r1, 524287(%r2)", + )); + insns.push(( + Inst::Store16 { + rd: gpr(1), + mem: MemArg::BXD12 { + base: gpr(3), + index: gpr(2), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "40123000", + "sth %r1, 0(%r2,%r3)", + )); + insns.push(( + Inst::Store16 { + rd: gpr(1), + mem: MemArg::BXD12 { + base: gpr(3), + index: gpr(2), + disp: UImm12::maybe_from_u64(4095).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "40123FFF", + "sth %r1, 4095(%r2,%r3)", + )); + insns.push(( + Inst::Store16 { + rd: gpr(1), + mem: MemArg::BXD20 { + base: gpr(3), + index: gpr(2), + disp: SImm20::maybe_from_i64(-524288).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E31230008070", + "sthy %r1, -524288(%r2,%r3)", + )); + insns.push(( + Inst::Store16 { + rd: gpr(1), + mem: MemArg::BXD20 { + base: gpr(3), + index: gpr(2), + disp: SImm20::maybe_from_i64(524287).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3123FFF7F70", + "sthy %r1, 524287(%r2,%r3)", + )); + insns.push(( + Inst::Store32 { + rd: gpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "50102000", + "st %r1, 0(%r2)", + )); + insns.push(( + Inst::Store32 { + rd: gpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::maybe_from_u64(4095).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "50102FFF", + "st %r1, 4095(%r2)", + )); + insns.push(( + Inst::Store32 { + rd: gpr(1), + mem: MemArg::BXD20 { + base: gpr(2), + index: zero_reg(), + disp: SImm20::maybe_from_i64(-524288).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E31020008050", + "sty %r1, -524288(%r2)", + )); + insns.push(( + Inst::Store32 { + rd: gpr(1), + mem: MemArg::BXD20 { + base: gpr(2), + index: zero_reg(), + disp: SImm20::maybe_from_i64(524287).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3102FFF7F50", + "sty %r1, 524287(%r2)", + )); + insns.push(( + Inst::Store32 { + rd: gpr(1), + mem: MemArg::BXD12 { + base: gpr(3), + index: gpr(2), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "50123000", + "st %r1, 0(%r2,%r3)", + )); + insns.push(( + Inst::Store32 { + rd: gpr(1), + mem: MemArg::BXD12 { + base: gpr(3), + index: gpr(2), + disp: UImm12::maybe_from_u64(4095).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "50123FFF", + "st %r1, 4095(%r2,%r3)", + )); + insns.push(( + Inst::Store32 { + rd: gpr(1), + mem: MemArg::BXD20 { + base: gpr(3), + index: gpr(2), + disp: SImm20::maybe_from_i64(-524288).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E31230008050", + "sty %r1, -524288(%r2,%r3)", + )); + insns.push(( + Inst::Store32 { + rd: gpr(1), + mem: MemArg::BXD20 { + base: gpr(3), + index: gpr(2), + disp: SImm20::maybe_from_i64(524287).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3123FFF7F50", + "sty %r1, 524287(%r2,%r3)", + )); + insns.push(( + Inst::Store64 { + rd: gpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "E31020000024", + "stg %r1, 0(%r2)", + )); + insns.push(( + Inst::Store64 { + rd: gpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::maybe_from_u64(4095).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3102FFF0024", + "stg %r1, 4095(%r2)", + )); + insns.push(( + Inst::Store64 { + rd: gpr(1), + mem: MemArg::BXD20 { + base: gpr(2), + index: zero_reg(), + disp: SImm20::maybe_from_i64(-524288).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E31020008024", + "stg %r1, -524288(%r2)", + )); + insns.push(( + Inst::Store64 { + rd: gpr(1), + mem: MemArg::BXD20 { + base: gpr(2), + index: zero_reg(), + disp: SImm20::maybe_from_i64(524287).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3102FFF7F24", + "stg %r1, 524287(%r2)", + )); + insns.push(( + Inst::Store64 { + rd: gpr(1), + mem: MemArg::BXD12 { + base: gpr(3), + index: gpr(2), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "E31230000024", + "stg %r1, 0(%r2,%r3)", + )); + insns.push(( + Inst::Store64 { + rd: gpr(1), + mem: MemArg::BXD12 { + base: gpr(3), + index: gpr(2), + disp: UImm12::maybe_from_u64(4095).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3123FFF0024", + "stg %r1, 4095(%r2,%r3)", + )); + insns.push(( + Inst::Store64 { + rd: gpr(1), + mem: MemArg::BXD20 { + base: gpr(3), + index: gpr(2), + disp: SImm20::maybe_from_i64(-524288).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E31230008024", + "stg %r1, -524288(%r2,%r3)", + )); + insns.push(( + Inst::Store64 { + rd: gpr(1), + mem: MemArg::BXD20 { + base: gpr(3), + index: gpr(2), + disp: SImm20::maybe_from_i64(524287).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3123FFF7F24", + "stg %r1, 524287(%r2,%r3)", + )); + + insns.push(( + Inst::StoreImm8 { + imm: 255, + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "92FF2000", + "mvi 0(%r2), 255", + )); + insns.push(( + Inst::StoreImm8 { + imm: 0, + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::maybe_from_u64(4095).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "92002FFF", + "mvi 4095(%r2), 0", + )); + insns.push(( + Inst::StoreImm8 { + imm: 255, + mem: MemArg::BXD20 { + base: gpr(2), + index: zero_reg(), + disp: SImm20::maybe_from_i64(-524288).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "EBFF20008052", + "mviy -524288(%r2), 255", + )); + insns.push(( + Inst::StoreImm8 { + imm: 0, + mem: MemArg::BXD20 { + base: gpr(2), + index: zero_reg(), + disp: SImm20::maybe_from_i64(524287).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "EB002FFF7F52", + "mviy 524287(%r2), 0", + )); + insns.push(( + Inst::StoreImm16 { + imm: -32768, + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "E54420008000", + "mvhhi 0(%r2), -32768", + )); + insns.push(( + Inst::StoreImm16 { + imm: 32767, + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::maybe_from_u64(4095).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E5442FFF7FFF", + "mvhhi 4095(%r2), 32767", + )); + insns.push(( + Inst::StoreImm32SExt16 { + imm: -32768, + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "E54C20008000", + "mvhi 0(%r2), -32768", + )); + insns.push(( + Inst::StoreImm32SExt16 { + imm: 32767, + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::maybe_from_u64(4095).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E54C2FFF7FFF", + "mvhi 4095(%r2), 32767", + )); + insns.push(( + Inst::StoreImm64SExt16 { + imm: -32768, + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "E54820008000", + "mvghi 0(%r2), -32768", + )); + insns.push(( + Inst::StoreImm64SExt16 { + imm: 32767, + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::maybe_from_u64(4095).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E5482FFF7FFF", + "mvghi 4095(%r2), 32767", + )); + + insns.push(( + Inst::StoreRev16 { + rd: gpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "E3102000003F", + "strvh %r1, 0(%r2)", + )); + insns.push(( + Inst::StoreRev16 { + rd: gpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::maybe_from_u64(4095).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3102FFF003F", + "strvh %r1, 4095(%r2)", + )); + insns.push(( + Inst::StoreRev16 { + rd: gpr(1), + mem: MemArg::BXD20 { + base: gpr(2), + index: zero_reg(), + disp: SImm20::maybe_from_i64(-524288).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3102000803F", + "strvh %r1, -524288(%r2)", + )); + insns.push(( + Inst::StoreRev16 { + rd: gpr(1), + mem: MemArg::BXD20 { + base: gpr(2), + index: zero_reg(), + disp: SImm20::maybe_from_i64(524287).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3102FFF7F3F", + "strvh %r1, 524287(%r2)", + )); + insns.push(( + Inst::StoreRev16 { + rd: gpr(1), + mem: MemArg::BXD12 { + base: gpr(3), + index: gpr(2), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "E3123000003F", + "strvh %r1, 0(%r2,%r3)", + )); + insns.push(( + Inst::StoreRev16 { + rd: gpr(1), + mem: MemArg::BXD12 { + base: gpr(3), + index: gpr(2), + disp: UImm12::maybe_from_u64(4095).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3123FFF003F", + "strvh %r1, 4095(%r2,%r3)", + )); + insns.push(( + Inst::StoreRev16 { + rd: gpr(1), + mem: MemArg::BXD20 { + base: gpr(3), + index: gpr(2), + disp: SImm20::maybe_from_i64(-524288).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3123000803F", + "strvh %r1, -524288(%r2,%r3)", + )); + insns.push(( + Inst::StoreRev16 { + rd: gpr(1), + mem: MemArg::BXD20 { + base: gpr(3), + index: gpr(2), + disp: SImm20::maybe_from_i64(524287).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3123FFF7F3F", + "strvh %r1, 524287(%r2,%r3)", + )); + insns.push(( + Inst::StoreRev32 { + rd: gpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "E3102000003E", + "strv %r1, 0(%r2)", + )); + insns.push(( + Inst::StoreRev32 { + rd: gpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::maybe_from_u64(4095).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3102FFF003E", + "strv %r1, 4095(%r2)", + )); + insns.push(( + Inst::StoreRev32 { + rd: gpr(1), + mem: MemArg::BXD20 { + base: gpr(2), + index: zero_reg(), + disp: SImm20::maybe_from_i64(-524288).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3102000803E", + "strv %r1, -524288(%r2)", + )); + insns.push(( + Inst::StoreRev32 { + rd: gpr(1), + mem: MemArg::BXD20 { + base: gpr(2), + index: zero_reg(), + disp: SImm20::maybe_from_i64(524287).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3102FFF7F3E", + "strv %r1, 524287(%r2)", + )); + insns.push(( + Inst::StoreRev32 { + rd: gpr(1), + mem: MemArg::BXD12 { + base: gpr(3), + index: gpr(2), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "E3123000003E", + "strv %r1, 0(%r2,%r3)", + )); + insns.push(( + Inst::StoreRev32 { + rd: gpr(1), + mem: MemArg::BXD12 { + base: gpr(3), + index: gpr(2), + disp: UImm12::maybe_from_u64(4095).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3123FFF003E", + "strv %r1, 4095(%r2,%r3)", + )); + insns.push(( + Inst::StoreRev32 { + rd: gpr(1), + mem: MemArg::BXD20 { + base: gpr(3), + index: gpr(2), + disp: SImm20::maybe_from_i64(-524288).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3123000803E", + "strv %r1, -524288(%r2,%r3)", + )); + insns.push(( + Inst::StoreRev32 { + rd: gpr(1), + mem: MemArg::BXD20 { + base: gpr(3), + index: gpr(2), + disp: SImm20::maybe_from_i64(524287).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3123FFF7F3E", + "strv %r1, 524287(%r2,%r3)", + )); + insns.push(( + Inst::StoreRev64 { + rd: gpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "E3102000002F", + "strvg %r1, 0(%r2)", + )); + insns.push(( + Inst::StoreRev64 { + rd: gpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::maybe_from_u64(4095).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3102FFF002F", + "strvg %r1, 4095(%r2)", + )); + insns.push(( + Inst::StoreRev64 { + rd: gpr(1), + mem: MemArg::BXD20 { + base: gpr(2), + index: zero_reg(), + disp: SImm20::maybe_from_i64(-524288).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3102000802F", + "strvg %r1, -524288(%r2)", + )); + insns.push(( + Inst::StoreRev64 { + rd: gpr(1), + mem: MemArg::BXD20 { + base: gpr(2), + index: zero_reg(), + disp: SImm20::maybe_from_i64(524287).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3102FFF7F2F", + "strvg %r1, 524287(%r2)", + )); + insns.push(( + Inst::StoreRev64 { + rd: gpr(1), + mem: MemArg::BXD12 { + base: gpr(3), + index: gpr(2), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "E3123000002F", + "strvg %r1, 0(%r2,%r3)", + )); + insns.push(( + Inst::StoreRev64 { + rd: gpr(1), + mem: MemArg::BXD12 { + base: gpr(3), + index: gpr(2), + disp: UImm12::maybe_from_u64(4095).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3123FFF002F", + "strvg %r1, 4095(%r2,%r3)", + )); + insns.push(( + Inst::StoreRev64 { + rd: gpr(1), + mem: MemArg::BXD20 { + base: gpr(3), + index: gpr(2), + disp: SImm20::maybe_from_i64(-524288).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3123000802F", + "strvg %r1, -524288(%r2,%r3)", + )); + insns.push(( + Inst::StoreRev64 { + rd: gpr(1), + mem: MemArg::BXD20 { + base: gpr(3), + index: gpr(2), + disp: SImm20::maybe_from_i64(524287).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3123FFF7F2F", + "strvg %r1, 524287(%r2,%r3)", + )); + + insns.push(( + Inst::Store16 { + rd: gpr(1), + mem: MemArg::Label { + target: BranchTarget::ResolvedOffset(64), + }, + }, + "C41700000020", + "sthrl %r1, 64", + )); + insns.push(( + Inst::Store32 { + rd: gpr(1), + mem: MemArg::Label { + target: BranchTarget::ResolvedOffset(64), + }, + }, + "C41F00000020", + "strl %r1, 64", + )); + insns.push(( + Inst::Store64 { + rd: gpr(1), + mem: MemArg::Label { + target: BranchTarget::ResolvedOffset(64), + }, + }, + "C41B00000020", + "stgrl %r1, 64", + )); + + insns.push(( + Inst::LoadMultiple64 { + rt: writable_gpr(8), + rt2: writable_gpr(12), + addr_reg: gpr(15), + addr_off: SImm20::maybe_from_i64(-524288).unwrap(), + }, + "EB8CF0008004", + "lmg %r8, %r12, -524288(%r15)", + )); + insns.push(( + Inst::LoadMultiple64 { + rt: writable_gpr(8), + rt2: writable_gpr(12), + addr_reg: gpr(15), + addr_off: SImm20::maybe_from_i64(524287).unwrap(), + }, + "EB8CFFFF7F04", + "lmg %r8, %r12, 524287(%r15)", + )); + + insns.push(( + Inst::StoreMultiple64 { + rt: gpr(8), + rt2: gpr(12), + addr_reg: gpr(15), + addr_off: SImm20::maybe_from_i64(-524288).unwrap(), + }, + "EB8CF0008024", + "stmg %r8, %r12, -524288(%r15)", + )); + insns.push(( + Inst::StoreMultiple64 { + rt: gpr(8), + rt2: gpr(12), + addr_reg: gpr(15), + addr_off: SImm20::maybe_from_i64(524287).unwrap(), + }, + "EB8CFFFF7F24", + "stmg %r8, %r12, 524287(%r15)", + )); + + insns.push(( + Inst::LoadAddr { + rd: writable_gpr(1), + mem: MemArg::BXD12 { + base: zero_reg(), + index: zero_reg(), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "41100000", + "la %r1, 0", + )); + insns.push(( + Inst::LoadAddr { + rd: writable_gpr(1), + mem: MemArg::BXD12 { + base: zero_reg(), + index: zero_reg(), + disp: UImm12::maybe_from_u64(4095).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "41100FFF", + "la %r1, 4095", + )); + insns.push(( + Inst::LoadAddr { + rd: writable_gpr(1), + mem: MemArg::BXD20 { + base: zero_reg(), + index: zero_reg(), + disp: SImm20::maybe_from_i64(-524288).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E31000008071", + "lay %r1, -524288", + )); + insns.push(( + Inst::LoadAddr { + rd: writable_gpr(1), + mem: MemArg::BXD20 { + base: zero_reg(), + index: zero_reg(), + disp: SImm20::maybe_from_i64(524287).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3100FFF7F71", + "lay %r1, 524287", + )); + insns.push(( + Inst::LoadAddr { + rd: writable_gpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "41102000", + "la %r1, 0(%r2)", + )); + insns.push(( + Inst::LoadAddr { + rd: writable_gpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::maybe_from_u64(4095).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "41102FFF", + "la %r1, 4095(%r2)", + )); + insns.push(( + Inst::LoadAddr { + rd: writable_gpr(1), + mem: MemArg::BXD20 { + base: gpr(2), + index: zero_reg(), + disp: SImm20::maybe_from_i64(-524288).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E31020008071", + "lay %r1, -524288(%r2)", + )); + insns.push(( + Inst::LoadAddr { + rd: writable_gpr(1), + mem: MemArg::BXD20 { + base: gpr(2), + index: zero_reg(), + disp: SImm20::maybe_from_i64(524287).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3102FFF7F71", + "lay %r1, 524287(%r2)", + )); + insns.push(( + Inst::LoadAddr { + rd: writable_gpr(1), + mem: MemArg::BXD12 { + base: gpr(3), + index: gpr(2), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "41123000", + "la %r1, 0(%r2,%r3)", + )); + insns.push(( + Inst::LoadAddr { + rd: writable_gpr(1), + mem: MemArg::BXD12 { + base: gpr(3), + index: gpr(2), + disp: UImm12::maybe_from_u64(4095).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "41123FFF", + "la %r1, 4095(%r2,%r3)", + )); + insns.push(( + Inst::LoadAddr { + rd: writable_gpr(1), + mem: MemArg::BXD20 { + base: gpr(3), + index: gpr(2), + disp: SImm20::maybe_from_i64(-524288).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E31230008071", + "lay %r1, -524288(%r2,%r3)", + )); + insns.push(( + Inst::LoadAddr { + rd: writable_gpr(1), + mem: MemArg::BXD20 { + base: gpr(3), + index: gpr(2), + disp: SImm20::maybe_from_i64(524287).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3123FFF7F71", + "lay %r1, 524287(%r2,%r3)", + )); + insns.push(( + Inst::LoadAddr { + rd: writable_gpr(1), + mem: MemArg::Label { + target: BranchTarget::ResolvedOffset(64), + }, + }, + "C01000000020", + "larl %r1, 64", + )); + insns.push(( + Inst::LoadAddr { + rd: writable_gpr(1), + mem: MemArg::Symbol { + name: Box::new(ExternalName::testcase("test0")), + offset: 64, + flags: MemFlags::trusted(), + }, + }, + "C01000000000", + "larl %r1, %test0 + 64", + )); + + insns.push(( + Inst::LoadAddr { + rd: writable_gpr(1), + mem: MemArg::RegOffset { + reg: gpr(2), + off: 0, + flags: MemFlags::trusted(), + }, + }, + "41102000", + "la %r1, 0(%r2)", + )); + insns.push(( + Inst::LoadAddr { + rd: writable_gpr(1), + mem: MemArg::RegOffset { + reg: gpr(2), + off: 4095, + flags: MemFlags::trusted(), + }, + }, + "41102FFF", + "la %r1, 4095(%r2)", + )); + insns.push(( + Inst::LoadAddr { + rd: writable_gpr(1), + mem: MemArg::RegOffset { + reg: gpr(2), + off: -524288, + flags: MemFlags::trusted(), + }, + }, + "E31020008071", + "lay %r1, -524288(%r2)", + )); + insns.push(( + Inst::LoadAddr { + rd: writable_gpr(1), + mem: MemArg::RegOffset { + reg: gpr(2), + off: 524287, + flags: MemFlags::trusted(), + }, + }, + "E3102FFF7F71", + "lay %r1, 524287(%r2)", + )); + insns.push(( + Inst::LoadAddr { + rd: writable_gpr(1), + mem: MemArg::RegOffset { + reg: gpr(2), + off: -2147483648, + flags: MemFlags::trusted(), + }, + }, + "C0118000000041112000", + "lgfi %r1, -2147483648 ; la %r1, 0(%r1,%r2)", + )); + insns.push(( + Inst::LoadAddr { + rd: writable_gpr(1), + mem: MemArg::RegOffset { + reg: gpr(2), + off: 2147483647, + flags: MemFlags::trusted(), + }, + }, + "C0117FFFFFFF41112000", + "lgfi %r1, 2147483647 ; la %r1, 0(%r1,%r2)", + )); + insns.push(( + Inst::LoadAddr { + rd: writable_gpr(1), + mem: MemArg::RegOffset { + reg: gpr(2), + off: -9223372036854775808, + flags: MemFlags::trusted(), + }, + }, + "A51C800041112000", + "llihh %r1, 32768 ; la %r1, 0(%r1,%r2)", + )); + insns.push(( + Inst::LoadAddr { + rd: writable_gpr(1), + mem: MemArg::RegOffset { + reg: gpr(2), + off: 9223372036854775807, + flags: MemFlags::trusted(), + }, + }, + "C01E7FFFFFFFC019FFFFFFFF41112000", + "llihf %r1, 2147483647 ; iilf %r1, 4294967295 ; la %r1, 0(%r1,%r2)", + )); + + insns.push(( + Inst::Mov64 { + rd: writable_gpr(8), + rm: gpr(9), + }, + "B9040089", + "lgr %r8, %r9", + )); + insns.push(( + Inst::Mov32 { + rd: writable_gpr(8), + rm: gpr(9), + }, + "1889", + "lr %r8, %r9", + )); + + insns.push(( + Inst::Mov32SImm16 { + rd: writable_gpr(8), + imm: -32768, + }, + "A7888000", + "lhi %r8, -32768", + )); + insns.push(( + Inst::Mov32SImm16 { + rd: writable_gpr(8), + imm: 32767, + }, + "A7887FFF", + "lhi %r8, 32767", + )); + insns.push(( + Inst::Mov32Imm { + rd: writable_gpr(8), + imm: 2147483648, + }, + "C08980000000", + "iilf %r8, 2147483648", + )); + insns.push(( + Inst::Mov32Imm { + rd: writable_gpr(8), + imm: 2147483647, + }, + "C0897FFFFFFF", + "iilf %r8, 2147483647", + )); + insns.push(( + Inst::Mov64SImm16 { + rd: writable_gpr(8), + imm: -32768, + }, + "A7898000", + "lghi %r8, -32768", + )); + insns.push(( + Inst::Mov64SImm16 { + rd: writable_gpr(8), + imm: 32767, + }, + "A7897FFF", + "lghi %r8, 32767", + )); + insns.push(( + Inst::Mov64SImm32 { + rd: writable_gpr(8), + imm: -2147483648, + }, + "C08180000000", + "lgfi %r8, -2147483648", + )); + insns.push(( + Inst::Mov64SImm32 { + rd: writable_gpr(8), + imm: 2147483647, + }, + "C0817FFFFFFF", + "lgfi %r8, 2147483647", + )); + insns.push(( + Inst::Mov64UImm16Shifted { + rd: writable_gpr(8), + imm: UImm16Shifted::maybe_from_u64(0x0000_0000_0000_ffff).unwrap(), + }, + "A58FFFFF", + "llill %r8, 65535", + )); + insns.push(( + Inst::Mov64UImm16Shifted { + rd: writable_gpr(8), + imm: UImm16Shifted::maybe_from_u64(0x0000_0000_ffff_0000).unwrap(), + }, + "A58EFFFF", + "llilh %r8, 65535", + )); + insns.push(( + Inst::Mov64UImm16Shifted { + rd: writable_gpr(8), + imm: UImm16Shifted::maybe_from_u64(0x0000_ffff_0000_0000).unwrap(), + }, + "A58DFFFF", + "llihl %r8, 65535", + )); + insns.push(( + Inst::Mov64UImm16Shifted { + rd: writable_gpr(8), + imm: UImm16Shifted::maybe_from_u64(0xffff_0000_0000_0000).unwrap(), + }, + "A58CFFFF", + "llihh %r8, 65535", + )); + insns.push(( + Inst::Mov64UImm32Shifted { + rd: writable_gpr(8), + imm: UImm32Shifted::maybe_from_u64(0x0000_0000_ffff_ffff).unwrap(), + }, + "C08FFFFFFFFF", + "llilf %r8, 4294967295", + )); + insns.push(( + Inst::Mov64UImm32Shifted { + rd: writable_gpr(8), + imm: UImm32Shifted::maybe_from_u64(0xffff_ffff_0000_0000).unwrap(), + }, + "C08EFFFFFFFF", + "llihf %r8, 4294967295", + )); + + insns.push(( + Inst::Insert64UImm16Shifted { + rd: writable_gpr(8), + imm: UImm16Shifted::maybe_from_u64(0x0000_0000_0000_ffff).unwrap(), + }, + "A583FFFF", + "iill %r8, 65535", + )); + insns.push(( + Inst::Insert64UImm16Shifted { + rd: writable_gpr(8), + imm: UImm16Shifted::maybe_from_u64(0x0000_0000_ffff_0000).unwrap(), + }, + "A582FFFF", + "iilh %r8, 65535", + )); + insns.push(( + Inst::Insert64UImm16Shifted { + rd: writable_gpr(8), + imm: UImm16Shifted::maybe_from_u64(0x0000_ffff_0000_0000).unwrap(), + }, + "A581FFFF", + "iihl %r8, 65535", + )); + insns.push(( + Inst::Insert64UImm16Shifted { + rd: writable_gpr(8), + imm: UImm16Shifted::maybe_from_u64(0xffff_0000_0000_0000).unwrap(), + }, + "A580FFFF", + "iihh %r8, 65535", + )); + insns.push(( + Inst::Insert64UImm32Shifted { + rd: writable_gpr(8), + imm: UImm32Shifted::maybe_from_u64(0x0000_0000_ffff_ffff).unwrap(), + }, + "C089FFFFFFFF", + "iilf %r8, 4294967295", + )); + insns.push(( + Inst::Insert64UImm32Shifted { + rd: writable_gpr(8), + imm: UImm32Shifted::maybe_from_u64(0xffff_ffff_0000_0000).unwrap(), + }, + "C088FFFFFFFF", + "iihf %r8, 4294967295", + )); + + insns.push(( + Inst::CMov32 { + rd: writable_gpr(8), + cond: Cond::from_mask(1), + rm: gpr(9), + }, + "B9F21089", + "locro %r8, %r9", + )); + insns.push(( + Inst::CMov64 { + rd: writable_gpr(8), + cond: Cond::from_mask(1), + rm: gpr(9), + }, + "B9E21089", + "locgro %r8, %r9", + )); + + insns.push(( + Inst::CMov32SImm16 { + rd: writable_gpr(8), + cond: Cond::from_mask(1), + imm: -32768, + }, + "EC8180000042", + "lochio %r8, -32768", + )); + insns.push(( + Inst::CMov32SImm16 { + rd: writable_gpr(8), + cond: Cond::from_mask(1), + imm: 32767, + }, + "EC817FFF0042", + "lochio %r8, 32767", + )); + insns.push(( + Inst::CMov64SImm16 { + rd: writable_gpr(8), + cond: Cond::from_mask(1), + imm: -32768, + }, + "EC8180000046", + "locghio %r8, -32768", + )); + insns.push(( + Inst::CMov64SImm16 { + rd: writable_gpr(8), + cond: Cond::from_mask(1), + imm: 32767, + }, + "EC817FFF0046", + "locghio %r8, 32767", + )); + + insns.push(( + Inst::Extend { + rd: writable_gpr(1), + rn: gpr(2), + signed: false, + from_bits: 8, + to_bits: 32, + }, + "B9940012", + "llcr %r1, %r2", + )); + insns.push(( + Inst::Extend { + rd: writable_gpr(1), + rn: gpr(2), + signed: true, + from_bits: 8, + to_bits: 32, + }, + "B9260012", + "lbr %r1, %r2", + )); + insns.push(( + Inst::Extend { + rd: writable_gpr(1), + rn: gpr(2), + signed: false, + from_bits: 16, + to_bits: 32, + }, + "B9950012", + "llhr %r1, %r2", + )); + insns.push(( + Inst::Extend { + rd: writable_gpr(1), + rn: gpr(2), + signed: true, + from_bits: 16, + to_bits: 32, + }, + "B9270012", + "lhr %r1, %r2", + )); + insns.push(( + Inst::Extend { + rd: writable_gpr(1), + rn: gpr(2), + signed: false, + from_bits: 8, + to_bits: 64, + }, + "B9840012", + "llgcr %r1, %r2", + )); + insns.push(( + Inst::Extend { + rd: writable_gpr(1), + rn: gpr(2), + signed: true, + from_bits: 8, + to_bits: 64, + }, + "B9060012", + "lgbr %r1, %r2", + )); + insns.push(( + Inst::Extend { + rd: writable_gpr(1), + rn: gpr(2), + signed: false, + from_bits: 16, + to_bits: 64, + }, + "B9850012", + "llghr %r1, %r2", + )); + insns.push(( + Inst::Extend { + rd: writable_gpr(1), + rn: gpr(2), + signed: true, + from_bits: 16, + to_bits: 64, + }, + "B9070012", + "lghr %r1, %r2", + )); + insns.push(( + Inst::Extend { + rd: writable_gpr(1), + rn: gpr(2), + signed: false, + from_bits: 32, + to_bits: 64, + }, + "B9160012", + "llgfr %r1, %r2", + )); + insns.push(( + Inst::Extend { + rd: writable_gpr(1), + rn: gpr(2), + signed: true, + from_bits: 32, + to_bits: 64, + }, + "B9140012", + "lgfr %r1, %r2", + )); + + insns.push(( + Inst::Jump { + dest: BranchTarget::ResolvedOffset(64), + }, + "C0F400000020", + "jg 64", + )); + + insns.push(( + Inst::OneWayCondBr { + target: BranchTarget::ResolvedOffset(64), + cond: Cond::from_mask(1), + }, + "C01400000020", + "jgo 64", + )); + insns.push(( + Inst::OneWayCondBr { + target: BranchTarget::ResolvedOffset(64), + cond: Cond::from_mask(2), + }, + "C02400000020", + "jgh 64", + )); + insns.push(( + Inst::OneWayCondBr { + target: BranchTarget::ResolvedOffset(64), + cond: Cond::from_mask(3), + }, + "C03400000020", + "jgnle 64", + )); + insns.push(( + Inst::OneWayCondBr { + target: BranchTarget::ResolvedOffset(64), + cond: Cond::from_mask(4), + }, + "C04400000020", + "jgl 64", + )); + insns.push(( + Inst::OneWayCondBr { + target: BranchTarget::ResolvedOffset(64), + cond: Cond::from_mask(5), + }, + "C05400000020", + "jgnhe 64", + )); + insns.push(( + Inst::OneWayCondBr { + target: BranchTarget::ResolvedOffset(64), + cond: Cond::from_mask(6), + }, + "C06400000020", + "jglh 64", + )); + insns.push(( + Inst::OneWayCondBr { + target: BranchTarget::ResolvedOffset(64), + cond: Cond::from_mask(7), + }, + "C07400000020", + "jgne 64", + )); + insns.push(( + Inst::OneWayCondBr { + target: BranchTarget::ResolvedOffset(64), + cond: Cond::from_mask(8), + }, + "C08400000020", + "jge 64", + )); + insns.push(( + Inst::OneWayCondBr { + target: BranchTarget::ResolvedOffset(64), + cond: Cond::from_mask(9), + }, + "C09400000020", + "jgnlh 64", + )); + insns.push(( + Inst::OneWayCondBr { + target: BranchTarget::ResolvedOffset(64), + cond: Cond::from_mask(10), + }, + "C0A400000020", + "jghe 64", + )); + insns.push(( + Inst::OneWayCondBr { + target: BranchTarget::ResolvedOffset(64), + cond: Cond::from_mask(11), + }, + "C0B400000020", + "jgnl 64", + )); + insns.push(( + Inst::OneWayCondBr { + target: BranchTarget::ResolvedOffset(64), + cond: Cond::from_mask(12), + }, + "C0C400000020", + "jgle 64", + )); + insns.push(( + Inst::OneWayCondBr { + target: BranchTarget::ResolvedOffset(64), + cond: Cond::from_mask(13), + }, + "C0D400000020", + "jgnh 64", + )); + insns.push(( + Inst::OneWayCondBr { + target: BranchTarget::ResolvedOffset(64), + cond: Cond::from_mask(14), + }, + "C0E400000020", + "jgno 64", + )); + + insns.push(( + Inst::CondBr { + taken: BranchTarget::ResolvedOffset(64), + not_taken: BranchTarget::ResolvedOffset(128), + cond: Cond::from_mask(1), + }, + "C01400000020C0F400000040", + "jgo 64 ; jg 128", + )); + insns.push(( + Inst::CondBr { + taken: BranchTarget::ResolvedOffset(64), + not_taken: BranchTarget::ResolvedOffset(128), + cond: Cond::from_mask(2), + }, + "C02400000020C0F400000040", + "jgh 64 ; jg 128", + )); + insns.push(( + Inst::CondBr { + taken: BranchTarget::ResolvedOffset(64), + not_taken: BranchTarget::ResolvedOffset(128), + cond: Cond::from_mask(3), + }, + "C03400000020C0F400000040", + "jgnle 64 ; jg 128", + )); + insns.push(( + Inst::CondBr { + taken: BranchTarget::ResolvedOffset(64), + not_taken: BranchTarget::ResolvedOffset(128), + cond: Cond::from_mask(4), + }, + "C04400000020C0F400000040", + "jgl 64 ; jg 128", + )); + insns.push(( + Inst::CondBr { + taken: BranchTarget::ResolvedOffset(64), + not_taken: BranchTarget::ResolvedOffset(128), + cond: Cond::from_mask(5), + }, + "C05400000020C0F400000040", + "jgnhe 64 ; jg 128", + )); + insns.push(( + Inst::CondBr { + taken: BranchTarget::ResolvedOffset(64), + not_taken: BranchTarget::ResolvedOffset(128), + cond: Cond::from_mask(6), + }, + "C06400000020C0F400000040", + "jglh 64 ; jg 128", + )); + insns.push(( + Inst::CondBr { + taken: BranchTarget::ResolvedOffset(64), + not_taken: BranchTarget::ResolvedOffset(128), + cond: Cond::from_mask(7), + }, + "C07400000020C0F400000040", + "jgne 64 ; jg 128", + )); + insns.push(( + Inst::CondBr { + taken: BranchTarget::ResolvedOffset(64), + not_taken: BranchTarget::ResolvedOffset(128), + cond: Cond::from_mask(8), + }, + "C08400000020C0F400000040", + "jge 64 ; jg 128", + )); + insns.push(( + Inst::CondBr { + taken: BranchTarget::ResolvedOffset(64), + not_taken: BranchTarget::ResolvedOffset(128), + cond: Cond::from_mask(9), + }, + "C09400000020C0F400000040", + "jgnlh 64 ; jg 128", + )); + insns.push(( + Inst::CondBr { + taken: BranchTarget::ResolvedOffset(64), + not_taken: BranchTarget::ResolvedOffset(128), + cond: Cond::from_mask(10), + }, + "C0A400000020C0F400000040", + "jghe 64 ; jg 128", + )); + insns.push(( + Inst::CondBr { + taken: BranchTarget::ResolvedOffset(64), + not_taken: BranchTarget::ResolvedOffset(128), + cond: Cond::from_mask(11), + }, + "C0B400000020C0F400000040", + "jgnl 64 ; jg 128", + )); + insns.push(( + Inst::CondBr { + taken: BranchTarget::ResolvedOffset(64), + not_taken: BranchTarget::ResolvedOffset(128), + cond: Cond::from_mask(12), + }, + "C0C400000020C0F400000040", + "jgle 64 ; jg 128", + )); + insns.push(( + Inst::CondBr { + taken: BranchTarget::ResolvedOffset(64), + not_taken: BranchTarget::ResolvedOffset(128), + cond: Cond::from_mask(13), + }, + "C0D400000020C0F400000040", + "jgnh 64 ; jg 128", + )); + insns.push(( + Inst::CondBr { + taken: BranchTarget::ResolvedOffset(64), + not_taken: BranchTarget::ResolvedOffset(128), + cond: Cond::from_mask(14), + }, + "C0E400000020C0F400000040", + "jgno 64 ; jg 128", + )); + + insns.push(( + Inst::IndirectBr { + rn: gpr(3), + targets: vec![], + }, + "07F3", + "br %r3", + )); + + insns.push(( + Inst::Call { + link: writable_gpr(14), + info: Box::new(CallInfo { + dest: ExternalName::testcase("test0"), + uses: Vec::new(), + defs: Vec::new(), + opcode: Opcode::Call, + }), + }, + "C0E500000000", + "brasl %r14, %test0", + )); + + insns.push(( + Inst::CallInd { + link: writable_gpr(14), + info: Box::new(CallIndInfo { + rn: gpr(1), + uses: Vec::new(), + defs: Vec::new(), + opcode: Opcode::CallIndirect, + }), + }, + "0DE1", + "basr %r14, %r1", + )); + + insns.push((Inst::Ret { link: gpr(14) }, "07FE", "br %r14")); + + insns.push((Inst::Debugtrap, "0001", "debugtrap")); + + insns.push(( + Inst::Trap { + trap_code: TrapCode::StackOverflow, + }, + "0000", + "trap", + )); + insns.push(( + Inst::TrapIf { + cond: Cond::from_mask(1), + trap_code: TrapCode::StackOverflow, + }, + "A7E400030000", + "jno 6 ; trap", + )); + + insns.push(( + Inst::FpuMove32 { + rd: writable_fpr(8), + rn: fpr(4), + }, + "3884", + "ler %f8, %f4", + )); + insns.push(( + Inst::FpuMove64 { + rd: writable_fpr(8), + rn: fpr(4), + }, + "2884", + "ldr %f8, %f4", + )); + insns.push(( + Inst::FpuCMov32 { + rd: writable_fpr(8), + rm: fpr(4), + cond: Cond::from_mask(1), + }, + "A7E400033884", + "jno 6 ; ler %f8, %f4", + )); + insns.push(( + Inst::FpuCMov64 { + rd: writable_fpr(8), + rm: fpr(4), + cond: Cond::from_mask(1), + }, + "A7E400032884", + "jno 6 ; ldr %f8, %f4", + )); + + insns.push(( + Inst::MovToFpr { + rd: writable_fpr(8), + rn: gpr(4), + }, + "B3C10084", + "ldgr %f8, %r4", + )); + insns.push(( + Inst::MovFromFpr { + rd: writable_gpr(8), + rn: fpr(4), + }, + "B3CD0084", + "lgdr %r8, %f4", + )); + + insns.push(( + Inst::FpuRR { + fpu_op: FPUOp1::Abs32, + rd: writable_fpr(8), + rn: fpr(12), + }, + "B300008C", + "lpebr %f8, %f12", + )); + insns.push(( + Inst::FpuRR { + fpu_op: FPUOp1::Abs64, + rd: writable_fpr(8), + rn: fpr(12), + }, + "B310008C", + "lpdbr %f8, %f12", + )); + insns.push(( + Inst::FpuRR { + fpu_op: FPUOp1::Neg32, + rd: writable_fpr(8), + rn: fpr(12), + }, + "B303008C", + "lcebr %f8, %f12", + )); + insns.push(( + Inst::FpuRR { + fpu_op: FPUOp1::Neg64, + rd: writable_fpr(8), + rn: fpr(12), + }, + "B313008C", + "lcdbr %f8, %f12", + )); + insns.push(( + Inst::FpuRR { + fpu_op: FPUOp1::NegAbs32, + rd: writable_fpr(8), + rn: fpr(12), + }, + "B301008C", + "lnebr %f8, %f12", + )); + insns.push(( + Inst::FpuRR { + fpu_op: FPUOp1::NegAbs64, + rd: writable_fpr(8), + rn: fpr(12), + }, + "B311008C", + "lndbr %f8, %f12", + )); + insns.push(( + Inst::FpuRR { + fpu_op: FPUOp1::Sqrt32, + rd: writable_fpr(8), + rn: fpr(12), + }, + "B314008C", + "sqebr %f8, %f12", + )); + insns.push(( + Inst::FpuRR { + fpu_op: FPUOp1::Sqrt64, + rd: writable_fpr(8), + rn: fpr(12), + }, + "B315008C", + "sqdbr %f8, %f12", + )); + insns.push(( + Inst::FpuRR { + fpu_op: FPUOp1::Cvt32To64, + rd: writable_fpr(8), + rn: fpr(12), + }, + "B304008C", + "ldebr %f8, %f12", + )); + insns.push(( + Inst::FpuRR { + fpu_op: FPUOp1::Cvt64To32, + rd: writable_fpr(8), + rn: fpr(12), + }, + "B344008C", + "ledbr %f8, %f12", + )); + + insns.push(( + Inst::FpuRRR { + fpu_op: FPUOp2::Add32, + rd: writable_fpr(8), + rm: fpr(12), + }, + "B30A008C", + "aebr %f8, %f12", + )); + insns.push(( + Inst::FpuRRR { + fpu_op: FPUOp2::Add64, + rd: writable_fpr(8), + rm: fpr(12), + }, + "B31A008C", + "adbr %f8, %f12", + )); + insns.push(( + Inst::FpuRRR { + fpu_op: FPUOp2::Sub32, + rd: writable_fpr(8), + rm: fpr(12), + }, + "B30B008C", + "sebr %f8, %f12", + )); + insns.push(( + Inst::FpuRRR { + fpu_op: FPUOp2::Sub64, + rd: writable_fpr(8), + rm: fpr(12), + }, + "B31B008C", + "sdbr %f8, %f12", + )); + insns.push(( + Inst::FpuRRR { + fpu_op: FPUOp2::Mul32, + rd: writable_fpr(8), + rm: fpr(12), + }, + "B317008C", + "meebr %f8, %f12", + )); + insns.push(( + Inst::FpuRRR { + fpu_op: FPUOp2::Mul64, + rd: writable_fpr(8), + rm: fpr(12), + }, + "B31C008C", + "mdbr %f8, %f12", + )); + insns.push(( + Inst::FpuRRR { + fpu_op: FPUOp2::Div32, + rd: writable_fpr(8), + rm: fpr(12), + }, + "B30D008C", + "debr %f8, %f12", + )); + insns.push(( + Inst::FpuRRR { + fpu_op: FPUOp2::Div64, + rd: writable_fpr(8), + rm: fpr(12), + }, + "B31D008C", + "ddbr %f8, %f12", + )); + + insns.push(( + Inst::FpuRRRR { + fpu_op: FPUOp3::MAdd32, + rd: writable_fpr(8), + rn: fpr(12), + rm: fpr(13), + }, + "B30E80CD", + "maebr %f8, %f12, %f13", + )); + insns.push(( + Inst::FpuRRRR { + fpu_op: FPUOp3::MAdd64, + rd: writable_fpr(8), + rn: fpr(12), + rm: fpr(13), + }, + "B31E80CD", + "madbr %f8, %f12, %f13", + )); + insns.push(( + Inst::FpuRRRR { + fpu_op: FPUOp3::MSub32, + rd: writable_fpr(8), + rn: fpr(12), + rm: fpr(13), + }, + "B30F80CD", + "msebr %f8, %f12, %f13", + )); + insns.push(( + Inst::FpuRRRR { + fpu_op: FPUOp3::MSub64, + rd: writable_fpr(8), + rn: fpr(12), + rm: fpr(13), + }, + "B31F80CD", + "msdbr %f8, %f12, %f13", + )); + + insns.push(( + Inst::FpuToInt { + op: FpuToIntOp::F32ToU32, + rd: writable_gpr(1), + rn: fpr(4), + }, + "B39C5014", + "clfebr %r1, 5, %f4, 0", + )); + + insns.push(( + Inst::FpuToInt { + op: FpuToIntOp::F32ToU64, + rd: writable_gpr(1), + rn: fpr(4), + }, + "B3AC5014", + "clgebr %r1, 5, %f4, 0", + )); + + insns.push(( + Inst::FpuToInt { + op: FpuToIntOp::F32ToI32, + rd: writable_gpr(1), + rn: fpr(4), + }, + "B3985014", + "cfebra %r1, 5, %f4, 0", + )); + + insns.push(( + Inst::FpuToInt { + op: FpuToIntOp::F32ToI64, + rd: writable_gpr(1), + rn: fpr(4), + }, + "B3A85014", + "cgebra %r1, 5, %f4, 0", + )); + + insns.push(( + Inst::FpuToInt { + op: FpuToIntOp::F64ToU32, + rd: writable_gpr(1), + rn: fpr(4), + }, + "B39D5014", + "clfdbr %r1, 5, %f4, 0", + )); + + insns.push(( + Inst::FpuToInt { + op: FpuToIntOp::F64ToU64, + rd: writable_gpr(1), + rn: fpr(4), + }, + "B3AD5014", + "clgdbr %r1, 5, %f4, 0", + )); + + insns.push(( + Inst::FpuToInt { + op: FpuToIntOp::F64ToI32, + rd: writable_gpr(1), + rn: fpr(4), + }, + "B3995014", + "cfdbra %r1, 5, %f4, 0", + )); + + insns.push(( + Inst::FpuToInt { + op: FpuToIntOp::F64ToI64, + rd: writable_gpr(1), + rn: fpr(4), + }, + "B3A95014", + "cgdbra %r1, 5, %f4, 0", + )); + + insns.push(( + Inst::IntToFpu { + op: IntToFpuOp::U32ToF32, + rd: writable_fpr(1), + rn: gpr(4), + }, + "B3900014", + "celfbr %f1, 0, %r4, 0", + )); + + insns.push(( + Inst::IntToFpu { + op: IntToFpuOp::I32ToF32, + rd: writable_fpr(1), + rn: gpr(4), + }, + "B3940014", + "cefbra %f1, 0, %r4, 0", + )); + + insns.push(( + Inst::IntToFpu { + op: IntToFpuOp::U32ToF64, + rd: writable_fpr(1), + rn: gpr(4), + }, + "B3910014", + "cdlfbr %f1, 0, %r4, 0", + )); + + insns.push(( + Inst::IntToFpu { + op: IntToFpuOp::I32ToF64, + rd: writable_fpr(1), + rn: gpr(4), + }, + "B3950014", + "cdfbra %f1, 0, %r4, 0", + )); + + insns.push(( + Inst::IntToFpu { + op: IntToFpuOp::U64ToF32, + rd: writable_fpr(1), + rn: gpr(4), + }, + "B3A00014", + "celgbr %f1, 0, %r4, 0", + )); + + insns.push(( + Inst::IntToFpu { + op: IntToFpuOp::I64ToF32, + rd: writable_fpr(1), + rn: gpr(4), + }, + "B3A40014", + "cegbra %f1, 0, %r4, 0", + )); + + insns.push(( + Inst::IntToFpu { + op: IntToFpuOp::U64ToF64, + rd: writable_fpr(1), + rn: gpr(4), + }, + "B3A10014", + "cdlgbr %f1, 0, %r4, 0", + )); + + insns.push(( + Inst::IntToFpu { + op: IntToFpuOp::I64ToF64, + rd: writable_fpr(1), + rn: gpr(4), + }, + "B3A50014", + "cdgbra %f1, 0, %r4, 0", + )); + + insns.push(( + Inst::FpuCopysign { + rd: writable_fpr(4), + rn: fpr(8), + rm: fpr(12), + }, + "B372C048", + "cpsdr %f4, %f12, %f8", + )); + + insns.push(( + Inst::FpuCmp32 { + rn: fpr(8), + rm: fpr(12), + }, + "B309008C", + "cebr %f8, %f12", + )); + insns.push(( + Inst::FpuCmp64 { + rn: fpr(8), + rm: fpr(12), + }, + "B319008C", + "cdbr %f8, %f12", + )); + + insns.push(( + Inst::FpuLoad32 { + rd: writable_fpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "78102000", + "le %f1, 0(%r2)", + )); + insns.push(( + Inst::FpuLoad32 { + rd: writable_fpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::maybe_from_u64(4095).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "78102FFF", + "le %f1, 4095(%r2)", + )); + insns.push(( + Inst::FpuLoad32 { + rd: writable_fpr(1), + mem: MemArg::BXD20 { + base: gpr(2), + index: zero_reg(), + disp: SImm20::maybe_from_i64(-524288).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "ED1020008064", + "ley %f1, -524288(%r2)", + )); + insns.push(( + Inst::FpuLoad32 { + rd: writable_fpr(1), + mem: MemArg::BXD20 { + base: gpr(2), + index: zero_reg(), + disp: SImm20::maybe_from_i64(524287).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "ED102FFF7F64", + "ley %f1, 524287(%r2)", + )); + insns.push(( + Inst::FpuLoad32 { + rd: writable_fpr(1), + mem: MemArg::BXD12 { + base: gpr(3), + index: gpr(2), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "78123000", + "le %f1, 0(%r2,%r3)", + )); + insns.push(( + Inst::FpuLoad32 { + rd: writable_fpr(1), + mem: MemArg::BXD12 { + base: gpr(3), + index: gpr(2), + disp: UImm12::maybe_from_u64(4095).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "78123FFF", + "le %f1, 4095(%r2,%r3)", + )); + insns.push(( + Inst::FpuLoad32 { + rd: writable_fpr(1), + mem: MemArg::BXD20 { + base: gpr(3), + index: gpr(2), + disp: SImm20::maybe_from_i64(-524288).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "ED1230008064", + "ley %f1, -524288(%r2,%r3)", + )); + insns.push(( + Inst::FpuLoad32 { + rd: writable_fpr(1), + mem: MemArg::BXD20 { + base: gpr(3), + index: gpr(2), + disp: SImm20::maybe_from_i64(524287).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "ED123FFF7F64", + "ley %f1, 524287(%r2,%r3)", + )); + insns.push(( + Inst::FpuLoad64 { + rd: writable_fpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "68102000", + "ld %f1, 0(%r2)", + )); + insns.push(( + Inst::FpuLoad64 { + rd: writable_fpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::maybe_from_u64(4095).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "68102FFF", + "ld %f1, 4095(%r2)", + )); + insns.push(( + Inst::FpuLoad64 { + rd: writable_fpr(1), + mem: MemArg::BXD20 { + base: gpr(2), + index: zero_reg(), + disp: SImm20::maybe_from_i64(-524288).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "ED1020008065", + "ldy %f1, -524288(%r2)", + )); + insns.push(( + Inst::FpuLoad64 { + rd: writable_fpr(1), + mem: MemArg::BXD20 { + base: gpr(2), + index: zero_reg(), + disp: SImm20::maybe_from_i64(524287).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "ED102FFF7F65", + "ldy %f1, 524287(%r2)", + )); + insns.push(( + Inst::FpuLoad64 { + rd: writable_fpr(1), + mem: MemArg::BXD12 { + base: gpr(3), + index: gpr(2), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "68123000", + "ld %f1, 0(%r2,%r3)", + )); + insns.push(( + Inst::FpuLoad64 { + rd: writable_fpr(1), + mem: MemArg::BXD12 { + base: gpr(3), + index: gpr(2), + disp: UImm12::maybe_from_u64(4095).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "68123FFF", + "ld %f1, 4095(%r2,%r3)", + )); + insns.push(( + Inst::FpuLoad64 { + rd: writable_fpr(1), + mem: MemArg::BXD20 { + base: gpr(3), + index: gpr(2), + disp: SImm20::maybe_from_i64(-524288).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "ED1230008065", + "ldy %f1, -524288(%r2,%r3)", + )); + insns.push(( + Inst::FpuLoad64 { + rd: writable_fpr(1), + mem: MemArg::BXD20 { + base: gpr(3), + index: gpr(2), + disp: SImm20::maybe_from_i64(524287).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "ED123FFF7F65", + "ldy %f1, 524287(%r2,%r3)", + )); + insns.push(( + Inst::FpuStore32 { + rd: fpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "70102000", + "ste %f1, 0(%r2)", + )); + insns.push(( + Inst::FpuStore32 { + rd: fpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::maybe_from_u64(4095).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "70102FFF", + "ste %f1, 4095(%r2)", + )); + insns.push(( + Inst::FpuStore32 { + rd: fpr(1), + mem: MemArg::BXD20 { + base: gpr(2), + index: zero_reg(), + disp: SImm20::maybe_from_i64(-524288).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "ED1020008066", + "stey %f1, -524288(%r2)", + )); + insns.push(( + Inst::FpuStore32 { + rd: fpr(1), + mem: MemArg::BXD20 { + base: gpr(2), + index: zero_reg(), + disp: SImm20::maybe_from_i64(524287).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "ED102FFF7F66", + "stey %f1, 524287(%r2)", + )); + insns.push(( + Inst::FpuStore32 { + rd: fpr(1), + mem: MemArg::BXD12 { + base: gpr(3), + index: gpr(2), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "70123000", + "ste %f1, 0(%r2,%r3)", + )); + insns.push(( + Inst::FpuStore32 { + rd: fpr(1), + mem: MemArg::BXD12 { + base: gpr(3), + index: gpr(2), + disp: UImm12::maybe_from_u64(4095).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "70123FFF", + "ste %f1, 4095(%r2,%r3)", + )); + insns.push(( + Inst::FpuStore32 { + rd: fpr(1), + mem: MemArg::BXD20 { + base: gpr(3), + index: gpr(2), + disp: SImm20::maybe_from_i64(-524288).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "ED1230008066", + "stey %f1, -524288(%r2,%r3)", + )); + insns.push(( + Inst::FpuStore32 { + rd: fpr(1), + mem: MemArg::BXD20 { + base: gpr(3), + index: gpr(2), + disp: SImm20::maybe_from_i64(524287).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "ED123FFF7F66", + "stey %f1, 524287(%r2,%r3)", + )); + insns.push(( + Inst::FpuStore64 { + rd: fpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "60102000", + "std %f1, 0(%r2)", + )); + insns.push(( + Inst::FpuStore64 { + rd: fpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::maybe_from_u64(4095).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "60102FFF", + "std %f1, 4095(%r2)", + )); + insns.push(( + Inst::FpuStore64 { + rd: fpr(1), + mem: MemArg::BXD20 { + base: gpr(2), + index: zero_reg(), + disp: SImm20::maybe_from_i64(-524288).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "ED1020008067", + "stdy %f1, -524288(%r2)", + )); + insns.push(( + Inst::FpuStore64 { + rd: fpr(1), + mem: MemArg::BXD20 { + base: gpr(2), + index: zero_reg(), + disp: SImm20::maybe_from_i64(524287).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "ED102FFF7F67", + "stdy %f1, 524287(%r2)", + )); + insns.push(( + Inst::FpuStore64 { + rd: fpr(1), + mem: MemArg::BXD12 { + base: gpr(3), + index: gpr(2), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "60123000", + "std %f1, 0(%r2,%r3)", + )); + insns.push(( + Inst::FpuStore64 { + rd: fpr(1), + mem: MemArg::BXD12 { + base: gpr(3), + index: gpr(2), + disp: UImm12::maybe_from_u64(4095).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "60123FFF", + "std %f1, 4095(%r2,%r3)", + )); + insns.push(( + Inst::FpuStore64 { + rd: fpr(1), + mem: MemArg::BXD20 { + base: gpr(3), + index: gpr(2), + disp: SImm20::maybe_from_i64(-524288).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "ED1230008067", + "stdy %f1, -524288(%r2,%r3)", + )); + insns.push(( + Inst::FpuStore64 { + rd: fpr(1), + mem: MemArg::BXD20 { + base: gpr(3), + index: gpr(2), + disp: SImm20::maybe_from_i64(524287).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "ED123FFF7F67", + "stdy %f1, 524287(%r2,%r3)", + )); + + insns.push(( + Inst::FpuLoadRev32 { + rd: writable_fpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "E61020000003", + "vlebrf %f1, 0(%r2), 0", + )); + insns.push(( + Inst::FpuLoadRev32 { + rd: writable_fpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::maybe_from_u64(4095).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E6102FFF0003", + "vlebrf %f1, 4095(%r2), 0", + )); + insns.push(( + Inst::FpuLoadRev32 { + rd: writable_fpr(1), + mem: MemArg::BXD20 { + base: gpr(2), + index: zero_reg(), + disp: SImm20::maybe_from_i64(-524288).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E31020008071E61010000003", + "lay %r1, -524288(%r2) ; vlebrf %f1, 0(%r1), 0", + )); + insns.push(( + Inst::FpuLoadRev32 { + rd: writable_fpr(1), + mem: MemArg::BXD20 { + base: gpr(2), + index: zero_reg(), + disp: SImm20::maybe_from_i64(524287).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3102FFF7F71E61010000003", + "lay %r1, 524287(%r2) ; vlebrf %f1, 0(%r1), 0", + )); + insns.push(( + Inst::FpuLoadRev32 { + rd: writable_fpr(1), + mem: MemArg::BXD12 { + base: gpr(3), + index: gpr(2), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "E61230000003", + "vlebrf %f1, 0(%r2,%r3), 0", + )); + insns.push(( + Inst::FpuLoadRev32 { + rd: writable_fpr(1), + mem: MemArg::BXD12 { + base: gpr(3), + index: gpr(2), + disp: UImm12::maybe_from_u64(4095).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E6123FFF0003", + "vlebrf %f1, 4095(%r2,%r3), 0", + )); + insns.push(( + Inst::FpuLoadRev32 { + rd: writable_fpr(1), + mem: MemArg::BXD20 { + base: gpr(3), + index: gpr(2), + disp: SImm20::maybe_from_i64(-524288).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E31230008071E61010000003", + "lay %r1, -524288(%r2,%r3) ; vlebrf %f1, 0(%r1), 0", + )); + insns.push(( + Inst::FpuLoadRev32 { + rd: writable_fpr(1), + mem: MemArg::BXD20 { + base: gpr(3), + index: gpr(2), + disp: SImm20::maybe_from_i64(524287).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3123FFF7F71E61010000003", + "lay %r1, 524287(%r2,%r3) ; vlebrf %f1, 0(%r1), 0", + )); + insns.push(( + Inst::FpuLoadRev64 { + rd: writable_fpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "E61020000002", + "vlebrg %f1, 0(%r2), 0", + )); + insns.push(( + Inst::FpuLoadRev64 { + rd: writable_fpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::maybe_from_u64(4095).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E6102FFF0002", + "vlebrg %f1, 4095(%r2), 0", + )); + insns.push(( + Inst::FpuLoadRev64 { + rd: writable_fpr(1), + mem: MemArg::BXD20 { + base: gpr(2), + index: zero_reg(), + disp: SImm20::maybe_from_i64(-524288).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E31020008071E61010000002", + "lay %r1, -524288(%r2) ; vlebrg %f1, 0(%r1), 0", + )); + insns.push(( + Inst::FpuLoadRev64 { + rd: writable_fpr(1), + mem: MemArg::BXD20 { + base: gpr(2), + index: zero_reg(), + disp: SImm20::maybe_from_i64(524287).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3102FFF7F71E61010000002", + "lay %r1, 524287(%r2) ; vlebrg %f1, 0(%r1), 0", + )); + insns.push(( + Inst::FpuLoadRev64 { + rd: writable_fpr(1), + mem: MemArg::BXD12 { + base: gpr(3), + index: gpr(2), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "E61230000002", + "vlebrg %f1, 0(%r2,%r3), 0", + )); + insns.push(( + Inst::FpuLoadRev64 { + rd: writable_fpr(1), + mem: MemArg::BXD12 { + base: gpr(3), + index: gpr(2), + disp: UImm12::maybe_from_u64(4095).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E6123FFF0002", + "vlebrg %f1, 4095(%r2,%r3), 0", + )); + insns.push(( + Inst::FpuLoadRev64 { + rd: writable_fpr(1), + mem: MemArg::BXD20 { + base: gpr(3), + index: gpr(2), + disp: SImm20::maybe_from_i64(-524288).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E31230008071E61010000002", + "lay %r1, -524288(%r2,%r3) ; vlebrg %f1, 0(%r1), 0", + )); + insns.push(( + Inst::FpuLoadRev64 { + rd: writable_fpr(1), + mem: MemArg::BXD20 { + base: gpr(3), + index: gpr(2), + disp: SImm20::maybe_from_i64(524287).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3123FFF7F71E61010000002", + "lay %r1, 524287(%r2,%r3) ; vlebrg %f1, 0(%r1), 0", + )); + insns.push(( + Inst::FpuStoreRev32 { + rd: fpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "E6102000000B", + "vstebrf %f1, 0(%r2), 0", + )); + insns.push(( + Inst::FpuStoreRev32 { + rd: fpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::maybe_from_u64(4095).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E6102FFF000B", + "vstebrf %f1, 4095(%r2), 0", + )); + insns.push(( + Inst::FpuStoreRev32 { + rd: fpr(1), + mem: MemArg::BXD20 { + base: gpr(2), + index: zero_reg(), + disp: SImm20::maybe_from_i64(-524288).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E31020008071E6101000000B", + "lay %r1, -524288(%r2) ; vstebrf %f1, 0(%r1), 0", + )); + insns.push(( + Inst::FpuStoreRev32 { + rd: fpr(1), + mem: MemArg::BXD20 { + base: gpr(2), + index: zero_reg(), + disp: SImm20::maybe_from_i64(524287).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3102FFF7F71E6101000000B", + "lay %r1, 524287(%r2) ; vstebrf %f1, 0(%r1), 0", + )); + insns.push(( + Inst::FpuStoreRev32 { + rd: fpr(1), + mem: MemArg::BXD12 { + base: gpr(3), + index: gpr(2), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "E6123000000B", + "vstebrf %f1, 0(%r2,%r3), 0", + )); + insns.push(( + Inst::FpuStoreRev32 { + rd: fpr(1), + mem: MemArg::BXD12 { + base: gpr(3), + index: gpr(2), + disp: UImm12::maybe_from_u64(4095).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E6123FFF000B", + "vstebrf %f1, 4095(%r2,%r3), 0", + )); + insns.push(( + Inst::FpuStoreRev32 { + rd: fpr(1), + mem: MemArg::BXD20 { + base: gpr(3), + index: gpr(2), + disp: SImm20::maybe_from_i64(-524288).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E31230008071E6101000000B", + "lay %r1, -524288(%r2,%r3) ; vstebrf %f1, 0(%r1), 0", + )); + insns.push(( + Inst::FpuStoreRev32 { + rd: fpr(1), + mem: MemArg::BXD20 { + base: gpr(3), + index: gpr(2), + disp: SImm20::maybe_from_i64(524287).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3123FFF7F71E6101000000B", + "lay %r1, 524287(%r2,%r3) ; vstebrf %f1, 0(%r1), 0", + )); + insns.push(( + Inst::FpuStoreRev64 { + rd: fpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "E6102000000A", + "vstebrg %f1, 0(%r2), 0", + )); + insns.push(( + Inst::FpuStoreRev64 { + rd: fpr(1), + mem: MemArg::BXD12 { + base: gpr(2), + index: zero_reg(), + disp: UImm12::maybe_from_u64(4095).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E6102FFF000A", + "vstebrg %f1, 4095(%r2), 0", + )); + insns.push(( + Inst::FpuStoreRev64 { + rd: fpr(1), + mem: MemArg::BXD20 { + base: gpr(2), + index: zero_reg(), + disp: SImm20::maybe_from_i64(-524288).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E31020008071E6101000000A", + "lay %r1, -524288(%r2) ; vstebrg %f1, 0(%r1), 0", + )); + insns.push(( + Inst::FpuStoreRev64 { + rd: fpr(1), + mem: MemArg::BXD20 { + base: gpr(2), + index: zero_reg(), + disp: SImm20::maybe_from_i64(524287).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3102FFF7F71E6101000000A", + "lay %r1, 524287(%r2) ; vstebrg %f1, 0(%r1), 0", + )); + insns.push(( + Inst::FpuStoreRev64 { + rd: fpr(1), + mem: MemArg::BXD12 { + base: gpr(3), + index: gpr(2), + disp: UImm12::zero(), + flags: MemFlags::trusted(), + }, + }, + "E6123000000A", + "vstebrg %f1, 0(%r2,%r3), 0", + )); + insns.push(( + Inst::FpuStoreRev64 { + rd: fpr(1), + mem: MemArg::BXD12 { + base: gpr(3), + index: gpr(2), + disp: UImm12::maybe_from_u64(4095).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E6123FFF000A", + "vstebrg %f1, 4095(%r2,%r3), 0", + )); + insns.push(( + Inst::FpuStoreRev64 { + rd: fpr(1), + mem: MemArg::BXD20 { + base: gpr(3), + index: gpr(2), + disp: SImm20::maybe_from_i64(-524288).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E31230008071E6101000000A", + "lay %r1, -524288(%r2,%r3) ; vstebrg %f1, 0(%r1), 0", + )); + insns.push(( + Inst::FpuStoreRev64 { + rd: fpr(1), + mem: MemArg::BXD20 { + base: gpr(3), + index: gpr(2), + disp: SImm20::maybe_from_i64(524287).unwrap(), + flags: MemFlags::trusted(), + }, + }, + "E3123FFF7F71E6101000000A", + "lay %r1, 524287(%r2,%r3) ; vstebrg %f1, 0(%r1), 0", + )); + + insns.push(( + Inst::LoadFpuConst32 { + rd: writable_fpr(8), + const_data: 1.0, + }, + "A71500043F80000078801000", + "bras %r1, 8 ; data.f32 1 ; le %f8, 0(%r1)", + )); + insns.push(( + Inst::LoadFpuConst64 { + rd: writable_fpr(8), + const_data: 1.0, + }, + "A71500063FF000000000000068801000", + "bras %r1, 12 ; data.f64 1 ; ld %f8, 0(%r1)", + )); + + insns.push(( + Inst::FpuRound { + rd: writable_fpr(8), + rn: fpr(12), + op: FpuRoundMode::Minus32, + }, + "B357708C", + "fiebr %f8, %f12, 7", + )); + insns.push(( + Inst::FpuRound { + rd: writable_fpr(8), + rn: fpr(12), + op: FpuRoundMode::Minus64, + }, + "B35F708C", + "fidbr %f8, %f12, 7", + )); + insns.push(( + Inst::FpuRound { + rd: writable_fpr(8), + rn: fpr(12), + op: FpuRoundMode::Plus32, + }, + "B357608C", + "fiebr %f8, %f12, 6", + )); + insns.push(( + Inst::FpuRound { + rd: writable_fpr(8), + rn: fpr(12), + op: FpuRoundMode::Plus64, + }, + "B35F608C", + "fidbr %f8, %f12, 6", + )); + insns.push(( + Inst::FpuRound { + rd: writable_fpr(8), + rn: fpr(12), + op: FpuRoundMode::Zero32, + }, + "B357508C", + "fiebr %f8, %f12, 5", + )); + insns.push(( + Inst::FpuRound { + rd: writable_fpr(8), + rn: fpr(12), + op: FpuRoundMode::Zero64, + }, + "B35F508C", + "fidbr %f8, %f12, 5", + )); + insns.push(( + Inst::FpuRound { + rd: writable_fpr(8), + rn: fpr(12), + op: FpuRoundMode::Nearest32, + }, + "B357408C", + "fiebr %f8, %f12, 4", + )); + insns.push(( + Inst::FpuRound { + rd: writable_fpr(8), + rn: fpr(12), + op: FpuRoundMode::Nearest64, + }, + "B35F408C", + "fidbr %f8, %f12, 4", + )); + + insns.push(( + Inst::FpuVecRRR { + fpu_op: FPUOp2::Max32, + rd: writable_fpr(4), + rn: fpr(6), + rm: fpr(8), + }, + "E746801820EF", + "wfmaxsb %f4, %f6, %f8, 1", + )); + insns.push(( + Inst::FpuVecRRR { + fpu_op: FPUOp2::Max64, + rd: writable_fpr(4), + rn: fpr(6), + rm: fpr(8), + }, + "E746801830EF", + "wfmaxdb %f4, %f6, %f8, 1", + )); + insns.push(( + Inst::FpuVecRRR { + fpu_op: FPUOp2::Min32, + rd: writable_fpr(4), + rn: fpr(6), + rm: fpr(8), + }, + "E746801820EE", + "wfminsb %f4, %f6, %f8, 1", + )); + insns.push(( + Inst::FpuVecRRR { + fpu_op: FPUOp2::Min64, + rd: writable_fpr(4), + rn: fpr(6), + rm: fpr(8), + }, + "E746801830EE", + "wfmindb %f4, %f6, %f8, 1", + )); + + let flags = settings::Flags::new(settings::builder()); + let rru = create_reg_universe(&flags); + let emit_info = EmitInfo::new(flags); + for (insn, expected_encoding, expected_printing) in insns { + println!( + "S390x: {:?}, {}, {}", + insn, expected_encoding, expected_printing + ); + + // Check the printed text is as expected. + let actual_printing = insn.show_rru(Some(&rru)); + assert_eq!(expected_printing, actual_printing); + + let mut sink = test_utils::TestCodeSink::new(); + let mut buffer = MachBuffer::new(); + insn.emit(&mut buffer, &emit_info, &mut Default::default()); + let buffer = buffer.finish(); + buffer.emit(&mut sink); + let actual_encoding = &sink.stringify(); + assert_eq!(expected_encoding, actual_encoding); + } +} diff --git a/cranelift/codegen/src/isa/s390x/inst/imms.rs b/cranelift/codegen/src/isa/s390x/inst/imms.rs new file mode 100644 index 0000000000..b1a459ea68 --- /dev/null +++ b/cranelift/codegen/src/isa/s390x/inst/imms.rs @@ -0,0 +1,231 @@ +//! S390x ISA definitions: immediate constants. + +use regalloc::{PrettyPrint, RealRegUniverse}; +use std::string::String; + +/// An unsigned 12-bit immediate. +#[derive(Clone, Copy, Debug)] +pub struct UImm12 { + /// The value. + value: u16, +} + +impl UImm12 { + pub fn maybe_from_u64(value: u64) -> Option { + if value < 4096 { + Some(UImm12 { + value: value as u16, + }) + } else { + None + } + } + + /// Create a zero immediate of this format. + pub fn zero() -> UImm12 { + UImm12 { value: 0 } + } + + /// Bits for encoding. + pub fn bits(&self) -> u32 { + u32::from(self.value) + } +} + +/// A signed 20-bit immediate. +#[derive(Clone, Copy, Debug)] +pub struct SImm20 { + /// The value. + value: i32, +} + +impl SImm20 { + pub fn maybe_from_i64(value: i64) -> Option { + if value >= -524288 && value < 524288 { + Some(SImm20 { + value: value as i32, + }) + } else { + None + } + } + + pub fn from_uimm12(value: UImm12) -> SImm20 { + SImm20 { + value: value.bits() as i32, + } + } + + /// Create a zero immediate of this format. + pub fn zero() -> SImm20 { + SImm20 { value: 0 } + } + + /// Bits for encoding. + pub fn bits(&self) -> u32 { + let encoded: u32 = self.value as u32; + encoded & 0xfffff + } +} + +/// A 16-bit immediate with a {0,16,32,48}-bit shift. +#[derive(Clone, Copy, Debug)] +pub struct UImm16Shifted { + /// The value. + pub bits: u16, + /// Result is `bits` shifted 16*shift bits to the left. + pub shift: u8, +} + +impl UImm16Shifted { + /// Construct a UImm16Shifted from an arbitrary 64-bit constant if possible. + pub fn maybe_from_u64(value: u64) -> Option { + let mask0 = 0x0000_0000_0000_ffffu64; + let mask1 = 0x0000_0000_ffff_0000u64; + let mask2 = 0x0000_ffff_0000_0000u64; + let mask3 = 0xffff_0000_0000_0000u64; + + if value == (value & mask0) { + return Some(UImm16Shifted { + bits: (value & mask0) as u16, + shift: 0, + }); + } + if value == (value & mask1) { + return Some(UImm16Shifted { + bits: ((value >> 16) & mask0) as u16, + shift: 1, + }); + } + if value == (value & mask2) { + return Some(UImm16Shifted { + bits: ((value >> 32) & mask0) as u16, + shift: 2, + }); + } + if value == (value & mask3) { + return Some(UImm16Shifted { + bits: ((value >> 48) & mask0) as u16, + shift: 3, + }); + } + None + } + + pub fn maybe_with_shift(imm: u16, shift: u8) -> Option { + let shift_enc = shift / 16; + if shift_enc > 3 { + None + } else { + Some(UImm16Shifted { + bits: imm, + shift: shift_enc, + }) + } + } + + pub fn negate_bits(&self) -> UImm16Shifted { + UImm16Shifted { + bits: !self.bits, + shift: self.shift, + } + } + + /// Returns the value that this constant represents. + pub fn value(&self) -> u64 { + (self.bits as u64) << (16 * self.shift) + } +} + +/// A 32-bit immediate with a {0,32}-bit shift. +#[derive(Clone, Copy, Debug)] +pub struct UImm32Shifted { + /// The value. + pub bits: u32, + /// Result is `bits` shifted 32*shift bits to the left. + pub shift: u8, +} + +impl UImm32Shifted { + /// Construct a UImm32Shifted from an arbitrary 64-bit constant if possible. + pub fn maybe_from_u64(value: u64) -> Option { + let mask0 = 0x0000_0000_ffff_ffffu64; + let mask1 = 0xffff_ffff_0000_0000u64; + + if value == (value & mask0) { + return Some(UImm32Shifted { + bits: (value & mask0) as u32, + shift: 0, + }); + } + if value == (value & mask1) { + return Some(UImm32Shifted { + bits: ((value >> 32) & mask0) as u32, + shift: 1, + }); + } + None + } + + pub fn maybe_with_shift(imm: u32, shift: u8) -> Option { + let shift_enc = shift / 32; + if shift_enc > 3 { + None + } else { + Some(UImm32Shifted { + bits: imm, + shift: shift_enc, + }) + } + } + + pub fn from_uimm16shifted(value: UImm16Shifted) -> UImm32Shifted { + if value.shift % 2 == 0 { + UImm32Shifted { + bits: value.bits as u32, + shift: value.shift / 2, + } + } else { + UImm32Shifted { + bits: (value.bits as u32) << 16, + shift: value.shift / 2, + } + } + } + + pub fn negate_bits(&self) -> UImm32Shifted { + UImm32Shifted { + bits: !self.bits, + shift: self.shift, + } + } + + /// Returns the value that this constant represents. + pub fn value(&self) -> u64 { + (self.bits as u64) << (32 * self.shift) + } +} + +impl PrettyPrint for UImm12 { + fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String { + format!("{}", self.value) + } +} + +impl PrettyPrint for SImm20 { + fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String { + format!("{}", self.value) + } +} + +impl PrettyPrint for UImm16Shifted { + fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String { + format!("{}", self.bits) + } +} + +impl PrettyPrint for UImm32Shifted { + fn show_rru(&self, _mb_rru: Option<&RealRegUniverse>) -> String { + format!("{}", self.bits) + } +} diff --git a/cranelift/codegen/src/isa/s390x/inst/mod.rs b/cranelift/codegen/src/isa/s390x/inst/mod.rs new file mode 100644 index 0000000000..ae4c36815b --- /dev/null +++ b/cranelift/codegen/src/isa/s390x/inst/mod.rs @@ -0,0 +1,3411 @@ +//! This module defines s390x-specific machine instruction types. + +// Some variants are not constructed, but we still want them as options in the future. +#![allow(dead_code)] + +use crate::binemit::CodeOffset; +use crate::ir::{types, ExternalName, Opcode, TrapCode, Type, ValueLabel}; +use crate::isa::unwind::UnwindInst; +use crate::machinst::*; +use crate::{settings, CodegenError, CodegenResult}; + +use regalloc::{PrettyPrint, RegUsageCollector, RegUsageMapper}; +use regalloc::{RealRegUniverse, Reg, RegClass, SpillSlot, VirtualReg, Writable}; + +use alloc::boxed::Box; +use alloc::vec::Vec; +use core::convert::TryFrom; +use smallvec::{smallvec, SmallVec}; +use std::string::{String, ToString}; + +pub mod regs; +pub use self::regs::*; +pub mod imms; +pub use self::imms::*; +pub mod args; +pub use self::args::*; +pub mod emit; +pub use self::emit::*; +pub mod unwind; + +#[cfg(test)] +mod emit_tests; + +//============================================================================= +// Instructions (top level): definition + +/// An ALU operation. This can be paired with several instruction formats +/// below (see `Inst`) in any combination. +#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)] +pub enum ALUOp { + Add32, + Add32Ext16, + Add64, + Add64Ext16, + Add64Ext32, + Sub32, + Sub32Ext16, + Sub64, + Sub64Ext16, + Sub64Ext32, + Mul32, + Mul32Ext16, + Mul64, + Mul64Ext16, + Mul64Ext32, + And32, + And64, + Orr32, + Orr64, + Xor32, + Xor64, + /// NAND + AndNot32, + AndNot64, + /// NOR + OrrNot32, + OrrNot64, + /// XNOR + XorNot32, + XorNot64, +} + +#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)] +pub enum UnaryOp { + Abs32, + Abs64, + Abs64Ext32, + Neg32, + Neg64, + Neg64Ext32, + PopcntByte, + PopcntReg, +} + +#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)] +pub enum ShiftOp { + RotL32, + RotL64, + LShL32, + LShL64, + LShR32, + LShR64, + AShR32, + AShR64, +} + +/// An integer comparison operation. +#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)] +pub enum CmpOp { + CmpS32, + CmpS32Ext16, + CmpS64, + CmpS64Ext16, + CmpS64Ext32, + CmpL32, + CmpL32Ext16, + CmpL64, + CmpL64Ext16, + CmpL64Ext32, +} + +/// A floating-point unit (FPU) operation with one arg. +#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)] +pub enum FPUOp1 { + Abs32, + Abs64, + Neg32, + Neg64, + NegAbs32, + NegAbs64, + Sqrt32, + Sqrt64, + Cvt32To64, + Cvt64To32, +} + +/// A floating-point unit (FPU) operation with two args. +#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)] +pub enum FPUOp2 { + Add32, + Add64, + Sub32, + Sub64, + Mul32, + Mul64, + Div32, + Div64, + Max32, + Max64, + Min32, + Min64, +} + +/// A floating-point unit (FPU) operation with three args. +#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)] +pub enum FPUOp3 { + MAdd32, + MAdd64, + MSub32, + MSub64, +} + +/// A conversion from an FP to an integer value. +#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)] +pub enum FpuToIntOp { + F32ToU32, + F32ToI32, + F32ToU64, + F32ToI64, + F64ToU32, + F64ToI32, + F64ToU64, + F64ToI64, +} + +/// A conversion from an integer to an FP value. +#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)] +pub enum IntToFpuOp { + U32ToF32, + I32ToF32, + U32ToF64, + I32ToF64, + U64ToF32, + I64ToF32, + U64ToF64, + I64ToF64, +} + +/// Modes for FP rounding ops: round down (floor) or up (ceil), or toward zero (trunc), or to +/// nearest, and for 32- or 64-bit FP values. +#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)] +pub enum FpuRoundMode { + Minus32, + Minus64, + Plus32, + Plus64, + Zero32, + Zero64, + Nearest32, + Nearest64, +} + +/// Additional information for (direct) Call instructions, left out of line to lower the size of +/// the Inst enum. +#[derive(Clone, Debug)] +pub struct CallInfo { + pub dest: ExternalName, + pub uses: Vec, + pub defs: Vec>, + pub opcode: Opcode, +} + +/// Additional information for CallInd instructions, left out of line to lower the size of the Inst +/// enum. +#[derive(Clone, Debug)] +pub struct CallIndInfo { + pub rn: Reg, + pub uses: Vec, + pub defs: Vec>, + pub opcode: Opcode, +} + +/// Additional information for JTSequence instructions, left out of line to lower the size of the Inst +/// enum. +#[derive(Clone, Debug)] +pub struct JTSequenceInfo { + pub default_target: BranchTarget, + pub targets: Vec, + pub targets_for_term: Vec, // needed for MachTerminator. +} + +/// Instruction formats. +#[derive(Clone, Debug)] +pub enum Inst { + /// A no-op of zero size. + Nop0, + + /// A no-op of size two bytes. + Nop2, + + /// An ALU operation with two register sources and a register destination. + AluRRR { + alu_op: ALUOp, + rd: Writable, + rn: Reg, + rm: Reg, + }, + /// An ALU operation with a register source and a signed 16-bit + /// immediate source, and a separate register destination. + AluRRSImm16 { + alu_op: ALUOp, + rd: Writable, + rn: Reg, + imm: i16, + }, + /// An ALU operation with a register in-/out operand and + /// a second register source. + AluRR { + alu_op: ALUOp, + rd: Writable, + rm: Reg, + }, + /// An ALU operation with a register in-/out operand and + /// a memory source. + AluRX { + alu_op: ALUOp, + rd: Writable, + mem: MemArg, + }, + /// An ALU operation with a register in-/out operand and a signed 16-bit + /// immediate source. + AluRSImm16 { + alu_op: ALUOp, + rd: Writable, + imm: i16, + }, + /// An ALU operation with a register in-/out operand and a signed 32-bit + /// immediate source. + AluRSImm32 { + alu_op: ALUOp, + rd: Writable, + imm: i32, + }, + /// An ALU operation with a register in-/out operand and an unsigned 32-bit + /// immediate source. + AluRUImm32 { + alu_op: ALUOp, + rd: Writable, + imm: u32, + }, + /// An ALU operation with a register in-/out operand and a shifted 16-bit + /// immediate source. + AluRUImm16Shifted { + alu_op: ALUOp, + rd: Writable, + imm: UImm16Shifted, + }, + /// An ALU operation with a register in-/out operand and a shifted 32-bit + /// immediate source. + AluRUImm32Shifted { + alu_op: ALUOp, + rd: Writable, + imm: UImm32Shifted, + }, + /// A multiply operation with two register sources and a register pair destination. + /// FIXME: The pair is hard-coded as %r0/%r1 because regalloc cannot handle pairs. + SMulWide { + rn: Reg, + rm: Reg, + }, + /// A multiply operation with an in/out register pair, and an extra register source. + /// Only the lower half of the register pair is used as input. + /// FIXME: The pair is hard-coded as %r0/%r1 because regalloc cannot handle pairs. + UMulWide { + rn: Reg, + }, + /// A divide operation with an in/out register pair, and an extra register source. + /// Only the lower half of the register pair is used as input. + /// FIXME: The pair is hard-coded as %r0/%r1 because regalloc cannot handle pairs. + SDivMod32 { + rn: Reg, + }, + SDivMod64 { + rn: Reg, + }, + /// A divide operation with an in/out register pair, and an extra register source. + /// FIXME: The pair is hard-coded as %r0/%r1 because regalloc cannot handle pairs. + UDivMod32 { + rn: Reg, + }, + UDivMod64 { + rn: Reg, + }, + /// A FLOGR operation with a register source and a register pair destination. + /// FIXME: The pair is hard-coded as %r0/%r1 because regalloc cannot handle pairs. + Flogr { + rn: Reg, + }, + + /// A shift instruction with a register source, a register destination, + /// and an immediate plus an optional register as shift count. + ShiftRR { + shift_op: ShiftOp, + rd: Writable, + rn: Reg, + shift_imm: SImm20, + shift_reg: Option, + }, + + /// An unary operation with a register source and a register destination. + UnaryRR { + op: UnaryOp, + rd: Writable, + rn: Reg, + }, + + /// A compare operation with two register sources. + CmpRR { + op: CmpOp, + rn: Reg, + rm: Reg, + }, + /// A compare operation with a register source and a memory source. + CmpRX { + op: CmpOp, + rn: Reg, + mem: MemArg, + }, + /// A compare operation with a register source and a signed 16-bit + /// immediate source. + CmpRSImm16 { + op: CmpOp, + rn: Reg, + imm: i16, + }, + /// A compare operation with a register source and a signed 32-bit + /// immediate source. + CmpRSImm32 { + op: CmpOp, + rn: Reg, + imm: i32, + }, + /// A compare operation with a register source and a unsigned 32-bit + /// immediate source. + CmpRUImm32 { + op: CmpOp, + rn: Reg, + imm: u32, + }, + /// A compare-and-trap instruction with two register sources. + CmpTrapRR { + op: CmpOp, + rn: Reg, + rm: Reg, + cond: Cond, + trap_code: TrapCode, + }, + /// A compare-and-trap operation with a register source and a signed 16-bit + /// immediate source. + CmpTrapRSImm16 { + op: CmpOp, + rn: Reg, + imm: i16, + cond: Cond, + trap_code: TrapCode, + }, + /// A compare-and-trap operation with a register source and an unsigned 16-bit + /// immediate source. + CmpTrapRUImm16 { + op: CmpOp, + rn: Reg, + imm: u16, + cond: Cond, + trap_code: TrapCode, + }, + + /// A 32-bit load. + Load32 { + rd: Writable, + mem: MemArg, + }, + /// An unsigned (zero-extending) 8-bit to 32-bit load. + Load32ZExt8 { + rd: Writable, + mem: MemArg, + }, + /// A signed (sign-extending) 8-bit to 32-bit load. + Load32SExt8 { + rd: Writable, + mem: MemArg, + }, + /// An unsigned (zero-extending) 16-bit to 32-bit load. + Load32ZExt16 { + rd: Writable, + mem: MemArg, + }, + /// A signed (sign-extending) 16-bit to 32-bit load. + Load32SExt16 { + rd: Writable, + mem: MemArg, + }, + /// A 64-bit load. + Load64 { + rd: Writable, + mem: MemArg, + }, + /// An unsigned (zero-extending) 8-bit to 64-bit load. + Load64ZExt8 { + rd: Writable, + mem: MemArg, + }, + /// A signed (sign-extending) 8-bit to 64-bit load. + Load64SExt8 { + rd: Writable, + mem: MemArg, + }, + /// An unsigned (zero-extending) 16-bit to 64-bit load. + Load64ZExt16 { + rd: Writable, + mem: MemArg, + }, + /// A signed (sign-extending) 16-bit to 64-bit load. + Load64SExt16 { + rd: Writable, + mem: MemArg, + }, + /// An unsigned (zero-extending) 32-bit to 64-bit load. + Load64ZExt32 { + rd: Writable, + mem: MemArg, + }, + /// A signed (sign-extending) 32-bit to 64-bit load. + Load64SExt32 { + rd: Writable, + mem: MemArg, + }, + + /// A 16-bit byte-reversed load. + LoadRev16 { + rd: Writable, + mem: MemArg, + }, + /// A 32-bit byte-reversed load. + LoadRev32 { + rd: Writable, + mem: MemArg, + }, + /// A 64-bit byte-reversed load. + LoadRev64 { + rd: Writable, + mem: MemArg, + }, + + /// An 8-bit store. + Store8 { + rd: Reg, + mem: MemArg, + }, + /// A 16-bit store. + Store16 { + rd: Reg, + mem: MemArg, + }, + /// A 32-bit store. + Store32 { + rd: Reg, + mem: MemArg, + }, + /// A 64-bit store. + Store64 { + rd: Reg, + mem: MemArg, + }, + /// An 8-bit store of an immediate. + StoreImm8 { + imm: u8, + mem: MemArg, + }, + /// A 16-bit store of an immediate. + StoreImm16 { + imm: i16, + mem: MemArg, + }, + /// A 32-bit store of a sign-extended 16-bit immediate. + StoreImm32SExt16 { + imm: i16, + mem: MemArg, + }, + /// A 64-bit store of a sign-extended 16-bit immediate. + StoreImm64SExt16 { + imm: i16, + mem: MemArg, + }, + + /// A 16-bit byte-reversed store. + StoreRev16 { + rd: Reg, + mem: MemArg, + }, + /// A 32-bit byte-reversed store. + StoreRev32 { + rd: Reg, + mem: MemArg, + }, + /// A 64-bit byte-reversed store. + StoreRev64 { + rd: Reg, + mem: MemArg, + }, + + /// A load-multiple instruction. + LoadMultiple64 { + rt: Writable, + rt2: Writable, + addr_reg: Reg, + addr_off: SImm20, + }, + /// A store-multiple instruction. + StoreMultiple64 { + rt: Reg, + rt2: Reg, + addr_reg: Reg, + addr_off: SImm20, + }, + + /// A 32-bit move instruction. + Mov32 { + rd: Writable, + rm: Reg, + }, + /// A 64-bit move instruction. + Mov64 { + rd: Writable, + rm: Reg, + }, + /// A 32-bit move instruction with a full 32-bit immediate. + Mov32Imm { + rd: Writable, + imm: u32, + }, + /// A 32-bit move instruction with a 16-bit signed immediate. + Mov32SImm16 { + rd: Writable, + imm: i16, + }, + /// A 64-bit move instruction with a 16-bit signed immediate. + Mov64SImm16 { + rd: Writable, + imm: i16, + }, + /// A 64-bit move instruction with a 32-bit signed immediate. + Mov64SImm32 { + rd: Writable, + imm: i32, + }, + /// A 64-bit move instruction with a shifted 16-bit immediate. + Mov64UImm16Shifted { + rd: Writable, + imm: UImm16Shifted, + }, + /// A 64-bit move instruction with a shifted 32-bit immediate. + Mov64UImm32Shifted { + rd: Writable, + imm: UImm32Shifted, + }, + + /// A 64-bit insert instruction with a shifted 16-bit immediate. + Insert64UImm16Shifted { + rd: Writable, + imm: UImm16Shifted, + }, + /// A 64-bit insert instruction with a shifted 32-bit immediate. + Insert64UImm32Shifted { + rd: Writable, + imm: UImm32Shifted, + }, + + /// A sign- or zero-extend operation. + Extend { + rd: Writable, + rn: Reg, + signed: bool, + from_bits: u8, + to_bits: u8, + }, + + /// A 32-bit conditional move instruction. + CMov32 { + rd: Writable, + cond: Cond, + rm: Reg, + }, + /// A 64-bit conditional move instruction. + CMov64 { + rd: Writable, + cond: Cond, + rm: Reg, + }, + /// A 32-bit conditional move instruction with a 16-bit signed immediate. + CMov32SImm16 { + rd: Writable, + cond: Cond, + imm: i16, + }, + /// A 64-bit conditional move instruction with a 16-bit signed immediate. + CMov64SImm16 { + rd: Writable, + cond: Cond, + imm: i16, + }, + + /// 32-bit FPU move. + FpuMove32 { + rd: Writable, + rn: Reg, + }, + /// 64-bit FPU move. + FpuMove64 { + rd: Writable, + rn: Reg, + }, + + /// A 32-bit conditional move FPU instruction. + FpuCMov32 { + rd: Writable, + cond: Cond, + rm: Reg, + }, + /// A 64-bit conditional move FPU instruction. + FpuCMov64 { + rd: Writable, + cond: Cond, + rm: Reg, + }, + + /// A 64-bit move instruction from GPR to FPR. + MovToFpr { + rd: Writable, + rn: Reg, + }, + /// A 64-bit move instruction from FPR to GPR. + MovFromFpr { + rd: Writable, + rn: Reg, + }, + + /// 1-op FPU instruction. + FpuRR { + fpu_op: FPUOp1, + rd: Writable, + rn: Reg, + }, + + /// 2-op FPU instruction. + FpuRRR { + fpu_op: FPUOp2, + rd: Writable, + rm: Reg, + }, + + /// 3-op FPU instruction. + FpuRRRR { + fpu_op: FPUOp3, + rd: Writable, + rn: Reg, + rm: Reg, + }, + + /// FPU copy sign instruction. + FpuCopysign { + rd: Writable, + rn: Reg, + rm: Reg, + }, + + /// FPU comparison, single-precision (32 bit). + FpuCmp32 { + rn: Reg, + rm: Reg, + }, + + /// FPU comparison, double-precision (64 bit). + FpuCmp64 { + rn: Reg, + rm: Reg, + }, + + /// Floating-point load, single-precision (32 bit). + FpuLoad32 { + rd: Writable, + mem: MemArg, + }, + /// Floating-point store, single-precision (32 bit). + FpuStore32 { + rd: Reg, + mem: MemArg, + }, + /// Floating-point load, double-precision (64 bit). + FpuLoad64 { + rd: Writable, + mem: MemArg, + }, + /// Floating-point store, double-precision (64 bit). + FpuStore64 { + rd: Reg, + mem: MemArg, + }, + /// Floating-point byte-reversed load, single-precision (32 bit). + FpuLoadRev32 { + rd: Writable, + mem: MemArg, + }, + /// Floating-point byte-reversed store, single-precision (32 bit). + FpuStoreRev32 { + rd: Reg, + mem: MemArg, + }, + /// Floating-point byte-reversed load, double-precision (64 bit). + FpuLoadRev64 { + rd: Writable, + mem: MemArg, + }, + /// Floating-point byte-reversed store, double-precision (64 bit). + FpuStoreRev64 { + rd: Reg, + mem: MemArg, + }, + + LoadFpuConst32 { + rd: Writable, + const_data: f32, + }, + + LoadFpuConst64 { + rd: Writable, + const_data: f64, + }, + + /// Conversion: FP -> integer. + FpuToInt { + op: FpuToIntOp, + rd: Writable, + rn: Reg, + }, + + /// Conversion: integer -> FP. + IntToFpu { + op: IntToFpuOp, + rd: Writable, + rn: Reg, + }, + + /// Round to integer. + FpuRound { + op: FpuRoundMode, + rd: Writable, + rn: Reg, + }, + + /// 2-op FPU instruction implemented as vector instruction with the W bit. + FpuVecRRR { + fpu_op: FPUOp2, + rd: Writable, + rn: Reg, + rm: Reg, + }, + + /// A machine call instruction. + Call { + link: Writable, + info: Box, + }, + /// A machine indirect-call instruction. + CallInd { + link: Writable, + info: Box, + }, + + // ---- branches (exactly one must appear at end of BB) ---- + /// A machine return instruction. + Ret { + link: Reg, + }, + + /// A placeholder instruction, generating no code, meaning that a function epilogue must be + /// inserted there. + EpiloguePlaceholder, + + /// An unconditional branch. + Jump { + dest: BranchTarget, + }, + + /// A conditional branch. Contains two targets; at emission time, both are emitted, but + /// the MachBuffer knows to truncate the trailing branch if fallthrough. We optimize the + /// choice of taken/not_taken (inverting the branch polarity as needed) based on the + /// fallthrough at the time of lowering. + CondBr { + taken: BranchTarget, + not_taken: BranchTarget, + cond: Cond, + }, + + /// A conditional trap: execute a `Trap` if the condition is true. This is + /// one VCode instruction because it uses embedded control flow; it is + /// logically a single-in, single-out region, but needs to appear as one + /// unit to the register allocator. + /// + /// The `Cond` gives the conditional-branch condition that will + /// *execute* the embedded `Trap`. (In the emitted code, we use the inverse + /// of this condition in a branch that skips the trap instruction.) + TrapIf { + cond: Cond, + trap_code: TrapCode, + }, + + /// A one-way conditional branch, invisible to the CFG processing; used *only* as part of + /// straight-line sequences in code to be emitted. + /// + /// In more detail: + /// - This branch is lowered to a branch at the machine-code level, but does not end a basic + /// block, and does not create edges in the CFG seen by regalloc. + /// - Thus, it is *only* valid to use as part of a single-in, single-out sequence that is + /// lowered from a single CLIF instruction. For example, certain arithmetic operations may + /// use these branches to handle certain conditions, such as overflows, traps, etc. + /// + /// See, e.g., the lowering of `trapif` (conditional trap) for an example. + OneWayCondBr { + target: BranchTarget, + cond: Cond, + }, + + /// An indirect branch through a register, augmented with set of all + /// possible successors. + IndirectBr { + rn: Reg, + targets: Vec, + }, + + /// A "debugtrap" instruction, used for e.g. traps and debug breakpoints. + Debugtrap, + + /// An instruction guaranteed to always be undefined and to trigger an illegal instruction at + /// runtime. + Trap { + trap_code: TrapCode, + }, + + /// Jump-table sequence, as one compound instruction (see note in lower.rs + /// for rationale). + JTSequence { + info: Box, + ridx: Reg, + rtmp1: Writable, + rtmp2: Writable, + }, + + /// Load an inline symbol reference with RelocDistance::Far. + LoadExtNameFar { + rd: Writable, + name: Box, + offset: i64, + }, + + /// Load address referenced by `mem` into `rd`. + LoadAddr { + rd: Writable, + mem: MemArg, + }, + + /// Marker, no-op in generated code: SP "virtual offset" is adjusted. This + /// controls how MemArg::NominalSPOffset args are lowered. + VirtualSPOffsetAdj { + offset: i64, + }, + + /// A definition of a value label. + ValueLabelMarker { + reg: Reg, + label: ValueLabel, + }, + + /// An unwind pseudoinstruction describing the state of the + /// machine at this program point. + Unwind { + inst: UnwindInst, + }, +} + +fn count_zero_half_words(mut value: u64) -> usize { + let mut count = 0; + for _ in 0..4 { + if value & 0xffff == 0 { + count += 1; + } + value >>= 16; + } + + count +} + +#[test] +fn inst_size_test() { + // This test will help with unintentionally growing the size + // of the Inst enum. + assert_eq!(32, std::mem::size_of::()); +} + +impl Inst { + /// Create a 64-bit move instruction. + pub fn mov64(to_reg: Writable, from_reg: Reg) -> Inst { + assert!(to_reg.to_reg().get_class() == from_reg.get_class()); + if from_reg.get_class() == RegClass::I64 { + Inst::Mov64 { + rd: to_reg, + rm: from_reg, + } + } else { + Inst::FpuMove64 { + rd: to_reg, + rn: from_reg, + } + } + } + + /// Create a 32-bit move instruction. + pub fn mov32(to_reg: Writable, from_reg: Reg) -> Inst { + if from_reg.get_class() == RegClass::I64 { + Inst::Mov32 { + rd: to_reg, + rm: from_reg, + } + } else { + Inst::FpuMove32 { + rd: to_reg, + rn: from_reg, + } + } + } + + /// Create an instruction that loads a 64-bit integer constant. + pub fn load_constant64(rd: Writable, value: u64) -> SmallVec<[Inst; 4]> { + if let Ok(imm) = i16::try_from(value as i64) { + // 16-bit signed immediate + smallvec![Inst::Mov64SImm16 { rd, imm }] + } else if let Ok(imm) = i32::try_from(value as i64) { + // 32-bit signed immediate + smallvec![Inst::Mov64SImm32 { rd, imm }] + } else if let Some(imm) = UImm16Shifted::maybe_from_u64(value) { + // 16-bit shifted immediate + smallvec![Inst::Mov64UImm16Shifted { rd, imm }] + } else if let Some(imm) = UImm32Shifted::maybe_from_u64(value) { + // 32-bit shifted immediate + smallvec![Inst::Mov64UImm32Shifted { rd, imm }] + } else { + let mut insts = smallvec![]; + let hi = value & 0xffff_ffff_0000_0000u64; + let lo = value & 0x0000_0000_ffff_ffffu64; + + if let Some(imm) = UImm16Shifted::maybe_from_u64(hi) { + // 16-bit shifted immediate + insts.push(Inst::Mov64UImm16Shifted { rd, imm }); + } else if let Some(imm) = UImm32Shifted::maybe_from_u64(hi) { + // 32-bit shifted immediate + insts.push(Inst::Mov64UImm32Shifted { rd, imm }); + } else { + unreachable!(); + } + + if let Some(imm) = UImm16Shifted::maybe_from_u64(lo) { + // 16-bit shifted immediate + insts.push(Inst::Insert64UImm16Shifted { rd, imm }); + } else if let Some(imm) = UImm32Shifted::maybe_from_u64(lo) { + // 32-bit shifted immediate + insts.push(Inst::Insert64UImm32Shifted { rd, imm }); + } else { + unreachable!(); + } + + insts + } + } + + /// Create an instruction that loads a 32-bit integer constant. + pub fn load_constant32(rd: Writable, value: u32) -> SmallVec<[Inst; 4]> { + if let Ok(imm) = i16::try_from(value as i32) { + // 16-bit signed immediate + smallvec![Inst::Mov32SImm16 { rd, imm }] + } else { + // 32-bit full immediate + smallvec![Inst::Mov32Imm { rd, imm: value }] + } + } + + /// Create an instruction that loads a 32-bit floating-point constant. + pub fn load_fp_constant32(rd: Writable, value: f32) -> Inst { + // TODO: use LZER to load 0.0 + Inst::LoadFpuConst32 { + rd, + const_data: value, + } + } + + /// Create an instruction that loads a 64-bit floating-point constant. + pub fn load_fp_constant64(rd: Writable, value: f64) -> Inst { + // TODO: use LZDR to load 0.0 + Inst::LoadFpuConst64 { + rd, + const_data: value, + } + } + + /// Generic constructor for a load (zero-extending where appropriate). + pub fn gen_load(into_reg: Writable, mem: MemArg, ty: Type) -> Inst { + match ty { + types::B1 | types::B8 | types::I8 => Inst::Load64ZExt8 { rd: into_reg, mem }, + types::B16 | types::I16 => Inst::Load64ZExt16 { rd: into_reg, mem }, + types::B32 | types::I32 => Inst::Load64ZExt32 { rd: into_reg, mem }, + types::B64 | types::I64 | types::R64 => Inst::Load64 { rd: into_reg, mem }, + types::F32 => Inst::FpuLoad32 { rd: into_reg, mem }, + types::F64 => Inst::FpuLoad64 { rd: into_reg, mem }, + _ => unimplemented!("gen_load({})", ty), + } + } + + /// Generic constructor for a store. + pub fn gen_store(mem: MemArg, from_reg: Reg, ty: Type) -> Inst { + match ty { + types::B1 | types::B8 | types::I8 => Inst::Store8 { rd: from_reg, mem }, + types::B16 | types::I16 => Inst::Store16 { rd: from_reg, mem }, + types::B32 | types::I32 => Inst::Store32 { rd: from_reg, mem }, + types::B64 | types::I64 | types::R64 => Inst::Store64 { rd: from_reg, mem }, + types::F32 => Inst::FpuStore32 { rd: from_reg, mem }, + types::F64 => Inst::FpuStore64 { rd: from_reg, mem }, + _ => unimplemented!("gen_store({})", ty), + } + } +} + +//============================================================================= +// Instructions: get_regs + +fn memarg_regs(memarg: &MemArg, collector: &mut RegUsageCollector) { + match memarg { + &MemArg::BXD12 { base, index, .. } | &MemArg::BXD20 { base, index, .. } => { + if base != zero_reg() { + collector.add_use(base); + } + if index != zero_reg() { + collector.add_use(index); + } + } + &MemArg::Label { .. } | &MemArg::Symbol { .. } => {} + &MemArg::RegOffset { reg, .. } => { + collector.add_use(reg); + } + &MemArg::InitialSPOffset { .. } | &MemArg::NominalSPOffset { .. } => { + collector.add_use(stack_reg()); + } + } +} + +fn s390x_get_regs(inst: &Inst, collector: &mut RegUsageCollector) { + match inst { + &Inst::AluRRR { rd, rn, rm, .. } => { + collector.add_def(rd); + collector.add_use(rn); + collector.add_use(rm); + } + &Inst::AluRRSImm16 { rd, rn, .. } => { + collector.add_def(rd); + collector.add_use(rn); + } + &Inst::AluRR { rd, rm, .. } => { + collector.add_mod(rd); + collector.add_use(rm); + } + &Inst::AluRX { rd, ref mem, .. } => { + collector.add_mod(rd); + memarg_regs(mem, collector); + } + &Inst::AluRSImm16 { rd, .. } => { + collector.add_mod(rd); + } + &Inst::AluRSImm32 { rd, .. } => { + collector.add_mod(rd); + } + &Inst::AluRUImm32 { rd, .. } => { + collector.add_mod(rd); + } + &Inst::AluRUImm16Shifted { rd, .. } => { + collector.add_mod(rd); + } + &Inst::AluRUImm32Shifted { rd, .. } => { + collector.add_mod(rd); + } + &Inst::SMulWide { rn, rm, .. } => { + collector.add_def(writable_gpr(0)); + collector.add_def(writable_gpr(1)); + collector.add_use(rn); + collector.add_use(rm); + } + &Inst::UMulWide { rn, .. } => { + collector.add_def(writable_gpr(0)); + collector.add_mod(writable_gpr(1)); + collector.add_use(rn); + } + &Inst::SDivMod32 { rn, .. } | &Inst::SDivMod64 { rn, .. } => { + collector.add_def(writable_gpr(0)); + collector.add_mod(writable_gpr(1)); + collector.add_use(rn); + } + &Inst::UDivMod32 { rn, .. } | &Inst::UDivMod64 { rn, .. } => { + collector.add_mod(writable_gpr(0)); + collector.add_mod(writable_gpr(1)); + collector.add_use(rn); + } + &Inst::Flogr { rn, .. } => { + collector.add_def(writable_gpr(0)); + collector.add_def(writable_gpr(1)); + collector.add_use(rn); + } + &Inst::ShiftRR { + rd, rn, shift_reg, .. + } => { + collector.add_def(rd); + collector.add_use(rn); + if let Some(reg) = shift_reg { + collector.add_use(reg); + } + } + &Inst::UnaryRR { rd, rn, .. } => { + collector.add_def(rd); + collector.add_use(rn); + } + &Inst::CmpRR { rn, rm, .. } => { + collector.add_use(rn); + collector.add_use(rm); + } + &Inst::CmpRX { rn, ref mem, .. } => { + collector.add_use(rn); + memarg_regs(mem, collector); + } + &Inst::CmpRSImm16 { rn, .. } => { + collector.add_use(rn); + } + &Inst::CmpRSImm32 { rn, .. } => { + collector.add_use(rn); + } + &Inst::CmpRUImm32 { rn, .. } => { + collector.add_use(rn); + } + &Inst::CmpTrapRR { rn, rm, .. } => { + collector.add_use(rn); + collector.add_use(rm); + } + &Inst::CmpTrapRSImm16 { rn, .. } => { + collector.add_use(rn); + } + &Inst::CmpTrapRUImm16 { rn, .. } => { + collector.add_use(rn); + } + &Inst::Load32 { rd, ref mem, .. } + | &Inst::Load32ZExt8 { rd, ref mem, .. } + | &Inst::Load32SExt8 { rd, ref mem, .. } + | &Inst::Load32ZExt16 { rd, ref mem, .. } + | &Inst::Load32SExt16 { rd, ref mem, .. } + | &Inst::Load64 { rd, ref mem, .. } + | &Inst::Load64ZExt8 { rd, ref mem, .. } + | &Inst::Load64SExt8 { rd, ref mem, .. } + | &Inst::Load64ZExt16 { rd, ref mem, .. } + | &Inst::Load64SExt16 { rd, ref mem, .. } + | &Inst::Load64ZExt32 { rd, ref mem, .. } + | &Inst::Load64SExt32 { rd, ref mem, .. } + | &Inst::LoadRev16 { rd, ref mem, .. } + | &Inst::LoadRev32 { rd, ref mem, .. } + | &Inst::LoadRev64 { rd, ref mem, .. } => { + collector.add_def(rd); + memarg_regs(mem, collector); + } + &Inst::Store8 { rd, ref mem, .. } + | &Inst::Store16 { rd, ref mem, .. } + | &Inst::Store32 { rd, ref mem, .. } + | &Inst::Store64 { rd, ref mem, .. } + | &Inst::StoreRev16 { rd, ref mem, .. } + | &Inst::StoreRev32 { rd, ref mem, .. } + | &Inst::StoreRev64 { rd, ref mem, .. } => { + collector.add_use(rd); + memarg_regs(mem, collector); + } + &Inst::StoreImm8 { ref mem, .. } + | &Inst::StoreImm16 { ref mem, .. } + | &Inst::StoreImm32SExt16 { ref mem, .. } + | &Inst::StoreImm64SExt16 { ref mem, .. } => { + memarg_regs(mem, collector); + } + &Inst::LoadMultiple64 { + rt, rt2, addr_reg, .. + } => { + let first_regnum = rt.to_reg().get_hw_encoding(); + let last_regnum = rt2.to_reg().get_hw_encoding(); + for regnum in first_regnum..last_regnum + 1 { + collector.add_def(writable_gpr(regnum)); + } + collector.add_use(addr_reg); + } + &Inst::StoreMultiple64 { + rt, rt2, addr_reg, .. + } => { + let first_regnum = rt.get_hw_encoding(); + let last_regnum = rt2.get_hw_encoding(); + for regnum in first_regnum..last_regnum + 1 { + collector.add_use(gpr(regnum)); + } + collector.add_use(addr_reg); + } + &Inst::Mov64 { rd, rm } => { + collector.add_def(rd); + collector.add_use(rm); + } + &Inst::Mov32 { rd, rm } => { + collector.add_def(rd); + collector.add_use(rm); + } + &Inst::Mov32Imm { rd, .. } + | &Inst::Mov32SImm16 { rd, .. } + | &Inst::Mov64SImm16 { rd, .. } + | &Inst::Mov64SImm32 { rd, .. } + | &Inst::Mov64UImm16Shifted { rd, .. } + | &Inst::Mov64UImm32Shifted { rd, .. } => { + collector.add_def(rd); + } + &Inst::CMov32 { rd, rm, .. } | &Inst::CMov64 { rd, rm, .. } => { + collector.add_mod(rd); + collector.add_use(rm); + } + &Inst::CMov32SImm16 { rd, .. } | &Inst::CMov64SImm16 { rd, .. } => { + collector.add_mod(rd); + } + &Inst::Insert64UImm16Shifted { rd, .. } | &Inst::Insert64UImm32Shifted { rd, .. } => { + collector.add_mod(rd); + } + &Inst::FpuMove32 { rd, rn } | &Inst::FpuMove64 { rd, rn } => { + collector.add_def(rd); + collector.add_use(rn); + } + &Inst::FpuCMov32 { rd, rm, .. } | &Inst::FpuCMov64 { rd, rm, .. } => { + collector.add_mod(rd); + collector.add_use(rm); + } + &Inst::MovToFpr { rd, rn } | &Inst::MovFromFpr { rd, rn } => { + collector.add_def(rd); + collector.add_use(rn); + } + &Inst::FpuRR { rd, rn, .. } => { + collector.add_def(rd); + collector.add_use(rn); + } + &Inst::FpuRRR { rd, rm, .. } => { + collector.add_mod(rd); + collector.add_use(rm); + } + &Inst::FpuRRRR { rd, rn, rm, .. } => { + collector.add_mod(rd); + collector.add_use(rn); + collector.add_use(rm); + } + &Inst::FpuCopysign { rd, rn, rm, .. } => { + collector.add_def(rd); + collector.add_use(rn); + collector.add_use(rm); + } + &Inst::FpuCmp32 { rn, rm } | &Inst::FpuCmp64 { rn, rm } => { + collector.add_use(rn); + collector.add_use(rm); + } + &Inst::FpuLoad32 { rd, ref mem, .. } => { + collector.add_def(rd); + memarg_regs(mem, collector); + } + &Inst::FpuLoad64 { rd, ref mem, .. } => { + collector.add_def(rd); + memarg_regs(mem, collector); + } + &Inst::FpuStore32 { rd, ref mem, .. } => { + collector.add_use(rd); + memarg_regs(mem, collector); + } + &Inst::FpuStore64 { rd, ref mem, .. } => { + collector.add_use(rd); + memarg_regs(mem, collector); + } + &Inst::FpuLoadRev32 { rd, ref mem, .. } => { + collector.add_def(rd); + memarg_regs(mem, collector); + } + &Inst::FpuLoadRev64 { rd, ref mem, .. } => { + collector.add_def(rd); + memarg_regs(mem, collector); + } + &Inst::FpuStoreRev32 { rd, ref mem, .. } => { + collector.add_use(rd); + memarg_regs(mem, collector); + } + &Inst::FpuStoreRev64 { rd, ref mem, .. } => { + collector.add_use(rd); + memarg_regs(mem, collector); + } + &Inst::LoadFpuConst32 { rd, .. } | &Inst::LoadFpuConst64 { rd, .. } => { + collector.add_def(rd); + } + &Inst::FpuToInt { rd, rn, .. } => { + collector.add_def(rd); + collector.add_use(rn); + } + &Inst::IntToFpu { rd, rn, .. } => { + collector.add_def(rd); + collector.add_use(rn); + } + &Inst::FpuRound { rd, rn, .. } => { + collector.add_def(rd); + collector.add_use(rn); + } + &Inst::FpuVecRRR { rd, rn, rm, .. } => { + collector.add_def(rd); + collector.add_use(rn); + collector.add_use(rm); + } + &Inst::Extend { rd, rn, .. } => { + collector.add_def(rd); + collector.add_use(rn); + } + &Inst::Call { link, ref info } => { + collector.add_def(link); + collector.add_uses(&*info.uses); + collector.add_defs(&*info.defs); + } + &Inst::CallInd { link, ref info } => { + collector.add_def(link); + collector.add_uses(&*info.uses); + collector.add_defs(&*info.defs); + collector.add_use(info.rn); + } + &Inst::Ret { .. } => {} + &Inst::Jump { .. } | &Inst::EpiloguePlaceholder => {} + &Inst::IndirectBr { rn, .. } => { + collector.add_use(rn); + } + &Inst::CondBr { .. } | &Inst::OneWayCondBr { .. } => {} + &Inst::Nop0 | Inst::Nop2 => {} + &Inst::Debugtrap => {} + &Inst::Trap { .. } => {} + &Inst::TrapIf { .. } => {} + &Inst::JTSequence { + ridx, rtmp1, rtmp2, .. + } => { + collector.add_use(ridx); + collector.add_def(rtmp1); + collector.add_def(rtmp2); + } + &Inst::LoadExtNameFar { rd, .. } => { + collector.add_def(rd); + } + &Inst::LoadAddr { rd, ref mem } => { + collector.add_def(rd); + memarg_regs(mem, collector); + } + &Inst::VirtualSPOffsetAdj { .. } => {} + &Inst::ValueLabelMarker { reg, .. } => { + collector.add_use(reg); + } + &Inst::Unwind { .. } => {} + } +} + +//============================================================================= +// Instructions: map_regs + +fn s390x_map_regs(inst: &mut Inst, mapper: &RUM) { + fn map_use(m: &RUM, r: &mut Reg) { + if r.is_virtual() { + let new = m.get_use(r.to_virtual_reg()).unwrap().to_reg(); + *r = new; + } + } + + fn map_def(m: &RUM, r: &mut Writable) { + if r.to_reg().is_virtual() { + let new = m.get_def(r.to_reg().to_virtual_reg()).unwrap().to_reg(); + *r = Writable::from_reg(new); + } + } + + fn map_mod(m: &RUM, r: &mut Writable) { + if r.to_reg().is_virtual() { + let new = m.get_mod(r.to_reg().to_virtual_reg()).unwrap().to_reg(); + *r = Writable::from_reg(new); + } + } + + fn map_mem(m: &RUM, mem: &mut MemArg) { + match mem { + &mut MemArg::BXD12 { + ref mut base, + ref mut index, + .. + } + | &mut MemArg::BXD20 { + ref mut base, + ref mut index, + .. + } => { + if *base != zero_reg() { + map_use(m, base); + } + if *index != zero_reg() { + map_use(m, index); + } + } + &mut MemArg::Label { .. } | &mut MemArg::Symbol { .. } => {} + &mut MemArg::RegOffset { ref mut reg, .. } => map_use(m, reg), + &mut MemArg::InitialSPOffset { .. } | &mut MemArg::NominalSPOffset { .. } => {} + }; + } + + match inst { + &mut Inst::AluRRR { + ref mut rd, + ref mut rn, + ref mut rm, + .. + } => { + map_def(mapper, rd); + map_use(mapper, rn); + map_use(mapper, rm); + } + &mut Inst::AluRRSImm16 { + ref mut rd, + ref mut rn, + .. + } => { + map_def(mapper, rd); + map_use(mapper, rn); + } + &mut Inst::AluRX { + ref mut rd, + ref mut mem, + .. + } => { + map_mod(mapper, rd); + map_mem(mapper, mem); + } + &mut Inst::AluRR { + ref mut rd, + ref mut rm, + .. + } => { + map_mod(mapper, rd); + map_use(mapper, rm); + } + &mut Inst::AluRSImm16 { ref mut rd, .. } => { + map_mod(mapper, rd); + } + &mut Inst::AluRSImm32 { ref mut rd, .. } => { + map_mod(mapper, rd); + } + &mut Inst::AluRUImm32 { ref mut rd, .. } => { + map_mod(mapper, rd); + } + &mut Inst::AluRUImm16Shifted { ref mut rd, .. } => { + map_mod(mapper, rd); + } + &mut Inst::AluRUImm32Shifted { ref mut rd, .. } => { + map_mod(mapper, rd); + } + &mut Inst::SMulWide { + ref mut rn, + ref mut rm, + .. + } => { + map_use(mapper, rn); + map_use(mapper, rm); + } + &mut Inst::UMulWide { ref mut rn, .. } => { + map_use(mapper, rn); + } + &mut Inst::SDivMod32 { ref mut rn, .. } => { + map_use(mapper, rn); + } + &mut Inst::SDivMod64 { ref mut rn, .. } => { + map_use(mapper, rn); + } + &mut Inst::UDivMod32 { ref mut rn, .. } => { + map_use(mapper, rn); + } + &mut Inst::UDivMod64 { ref mut rn, .. } => { + map_use(mapper, rn); + } + &mut Inst::Flogr { ref mut rn, .. } => { + map_use(mapper, rn); + } + &mut Inst::ShiftRR { + ref mut rd, + ref mut rn, + ref mut shift_reg, + .. + } => { + map_def(mapper, rd); + map_use(mapper, rn); + if let Some(reg) = shift_reg { + map_use(mapper, reg); + } + } + &mut Inst::UnaryRR { + ref mut rd, + ref mut rn, + .. + } => { + map_def(mapper, rd); + map_use(mapper, rn); + } + &mut Inst::CmpRR { + ref mut rn, + ref mut rm, + .. + } => { + map_use(mapper, rn); + map_use(mapper, rm); + } + &mut Inst::CmpRX { + ref mut rn, + ref mut mem, + .. + } => { + map_use(mapper, rn); + map_mem(mapper, mem); + } + &mut Inst::CmpRSImm16 { ref mut rn, .. } => { + map_use(mapper, rn); + } + &mut Inst::CmpRSImm32 { ref mut rn, .. } => { + map_use(mapper, rn); + } + &mut Inst::CmpRUImm32 { ref mut rn, .. } => { + map_use(mapper, rn); + } + &mut Inst::CmpTrapRR { + ref mut rn, + ref mut rm, + .. + } => { + map_use(mapper, rn); + map_use(mapper, rm); + } + &mut Inst::CmpTrapRSImm16 { ref mut rn, .. } => { + map_use(mapper, rn); + } + &mut Inst::CmpTrapRUImm16 { ref mut rn, .. } => { + map_use(mapper, rn); + } + + &mut Inst::Load32 { + ref mut rd, + ref mut mem, + .. + } => { + map_def(mapper, rd); + map_mem(mapper, mem); + } + &mut Inst::Load32ZExt8 { + ref mut rd, + ref mut mem, + .. + } => { + map_def(mapper, rd); + map_mem(mapper, mem); + } + &mut Inst::Load32SExt8 { + ref mut rd, + ref mut mem, + .. + } => { + map_def(mapper, rd); + map_mem(mapper, mem); + } + &mut Inst::Load32ZExt16 { + ref mut rd, + ref mut mem, + .. + } => { + map_def(mapper, rd); + map_mem(mapper, mem); + } + &mut Inst::Load32SExt16 { + ref mut rd, + ref mut mem, + .. + } => { + map_def(mapper, rd); + map_mem(mapper, mem); + } + &mut Inst::Load64 { + ref mut rd, + ref mut mem, + .. + } => { + map_def(mapper, rd); + map_mem(mapper, mem); + } + &mut Inst::Load64ZExt8 { + ref mut rd, + ref mut mem, + .. + } => { + map_def(mapper, rd); + map_mem(mapper, mem); + } + &mut Inst::Load64SExt8 { + ref mut rd, + ref mut mem, + .. + } => { + map_def(mapper, rd); + map_mem(mapper, mem); + } + &mut Inst::Load64ZExt16 { + ref mut rd, + ref mut mem, + .. + } => { + map_def(mapper, rd); + map_mem(mapper, mem); + } + &mut Inst::Load64SExt16 { + ref mut rd, + ref mut mem, + .. + } => { + map_def(mapper, rd); + map_mem(mapper, mem); + } + &mut Inst::Load64ZExt32 { + ref mut rd, + ref mut mem, + .. + } => { + map_def(mapper, rd); + map_mem(mapper, mem); + } + &mut Inst::Load64SExt32 { + ref mut rd, + ref mut mem, + .. + } => { + map_def(mapper, rd); + map_mem(mapper, mem); + } + &mut Inst::LoadRev16 { + ref mut rd, + ref mut mem, + .. + } => { + map_def(mapper, rd); + map_mem(mapper, mem); + } + &mut Inst::LoadRev32 { + ref mut rd, + ref mut mem, + .. + } => { + map_def(mapper, rd); + map_mem(mapper, mem); + } + &mut Inst::LoadRev64 { + ref mut rd, + ref mut mem, + .. + } => { + map_def(mapper, rd); + map_mem(mapper, mem); + } + + &mut Inst::Store8 { + ref mut rd, + ref mut mem, + .. + } => { + map_use(mapper, rd); + map_mem(mapper, mem); + } + &mut Inst::Store16 { + ref mut rd, + ref mut mem, + .. + } => { + map_use(mapper, rd); + map_mem(mapper, mem); + } + &mut Inst::Store32 { + ref mut rd, + ref mut mem, + .. + } => { + map_use(mapper, rd); + map_mem(mapper, mem); + } + &mut Inst::Store64 { + ref mut rd, + ref mut mem, + .. + } => { + map_use(mapper, rd); + map_mem(mapper, mem); + } + &mut Inst::StoreImm8 { ref mut mem, .. } => { + map_mem(mapper, mem); + } + &mut Inst::StoreImm16 { ref mut mem, .. } => { + map_mem(mapper, mem); + } + &mut Inst::StoreImm32SExt16 { ref mut mem, .. } => { + map_mem(mapper, mem); + } + &mut Inst::StoreImm64SExt16 { ref mut mem, .. } => { + map_mem(mapper, mem); + } + &mut Inst::StoreRev16 { + ref mut rd, + ref mut mem, + .. + } => { + map_use(mapper, rd); + map_mem(mapper, mem); + } + &mut Inst::StoreRev32 { + ref mut rd, + ref mut mem, + .. + } => { + map_use(mapper, rd); + map_mem(mapper, mem); + } + &mut Inst::StoreRev64 { + ref mut rd, + ref mut mem, + .. + } => { + map_use(mapper, rd); + map_mem(mapper, mem); + } + &mut Inst::LoadMultiple64 { .. } => { + // This instruction accesses all registers between rt and rt2, + // so it cannot be remapped. But this does not matter since + // the instruction is only ever used after register allocation. + unreachable!(); + } + &mut Inst::StoreMultiple64 { .. } => { + // This instruction accesses all registers between rt and rt2, + // so it cannot be remapped. But this does not matter since + // the instruction is only ever used after register allocation. + unreachable!(); + } + + &mut Inst::Mov64 { + ref mut rd, + ref mut rm, + } => { + map_def(mapper, rd); + map_use(mapper, rm); + } + &mut Inst::Mov32 { + ref mut rd, + ref mut rm, + } => { + map_def(mapper, rd); + map_use(mapper, rm); + } + &mut Inst::Mov32Imm { ref mut rd, .. } => { + map_def(mapper, rd); + } + &mut Inst::Mov32SImm16 { ref mut rd, .. } => { + map_def(mapper, rd); + } + &mut Inst::Mov64SImm16 { ref mut rd, .. } => { + map_def(mapper, rd); + } + &mut Inst::Mov64SImm32 { ref mut rd, .. } => { + map_def(mapper, rd); + } + &mut Inst::Mov64UImm16Shifted { ref mut rd, .. } => { + map_def(mapper, rd); + } + &mut Inst::Mov64UImm32Shifted { ref mut rd, .. } => { + map_def(mapper, rd); + } + &mut Inst::Insert64UImm16Shifted { ref mut rd, .. } => { + map_mod(mapper, rd); + } + &mut Inst::Insert64UImm32Shifted { ref mut rd, .. } => { + map_mod(mapper, rd); + } + &mut Inst::CMov64 { + ref mut rd, + ref mut rm, + .. + } => { + map_mod(mapper, rd); + map_use(mapper, rm); + } + &mut Inst::CMov32 { + ref mut rd, + ref mut rm, + .. + } => { + map_mod(mapper, rd); + map_use(mapper, rm); + } + &mut Inst::CMov32SImm16 { ref mut rd, .. } => { + map_mod(mapper, rd); + } + &mut Inst::CMov64SImm16 { ref mut rd, .. } => { + map_mod(mapper, rd); + } + &mut Inst::FpuMove32 { + ref mut rd, + ref mut rn, + } => { + map_def(mapper, rd); + map_use(mapper, rn); + } + &mut Inst::FpuMove64 { + ref mut rd, + ref mut rn, + } => { + map_def(mapper, rd); + map_use(mapper, rn); + } + &mut Inst::FpuCMov64 { + ref mut rd, + ref mut rm, + .. + } => { + map_mod(mapper, rd); + map_use(mapper, rm); + } + &mut Inst::FpuCMov32 { + ref mut rd, + ref mut rm, + .. + } => { + map_mod(mapper, rd); + map_use(mapper, rm); + } + &mut Inst::MovToFpr { + ref mut rd, + ref mut rn, + } => { + map_def(mapper, rd); + map_use(mapper, rn); + } + &mut Inst::MovFromFpr { + ref mut rd, + ref mut rn, + } => { + map_def(mapper, rd); + map_use(mapper, rn); + } + &mut Inst::FpuRR { + ref mut rd, + ref mut rn, + .. + } => { + map_def(mapper, rd); + map_use(mapper, rn); + } + &mut Inst::FpuRRR { + ref mut rd, + ref mut rm, + .. + } => { + map_mod(mapper, rd); + map_use(mapper, rm); + } + &mut Inst::FpuRRRR { + ref mut rd, + ref mut rn, + ref mut rm, + .. + } => { + map_mod(mapper, rd); + map_use(mapper, rn); + map_use(mapper, rm); + } + &mut Inst::FpuCopysign { + ref mut rd, + ref mut rn, + ref mut rm, + .. + } => { + map_def(mapper, rd); + map_use(mapper, rn); + map_use(mapper, rm); + } + &mut Inst::FpuCmp32 { + ref mut rn, + ref mut rm, + } => { + map_use(mapper, rn); + map_use(mapper, rm); + } + &mut Inst::FpuCmp64 { + ref mut rn, + ref mut rm, + } => { + map_use(mapper, rn); + map_use(mapper, rm); + } + &mut Inst::FpuLoad32 { + ref mut rd, + ref mut mem, + .. + } => { + map_def(mapper, rd); + map_mem(mapper, mem); + } + &mut Inst::FpuLoad64 { + ref mut rd, + ref mut mem, + .. + } => { + map_def(mapper, rd); + map_mem(mapper, mem); + } + &mut Inst::FpuStore32 { + ref mut rd, + ref mut mem, + .. + } => { + map_use(mapper, rd); + map_mem(mapper, mem); + } + &mut Inst::FpuStore64 { + ref mut rd, + ref mut mem, + .. + } => { + map_use(mapper, rd); + map_mem(mapper, mem); + } + &mut Inst::FpuLoadRev32 { + ref mut rd, + ref mut mem, + .. + } => { + map_def(mapper, rd); + map_mem(mapper, mem); + } + &mut Inst::FpuLoadRev64 { + ref mut rd, + ref mut mem, + .. + } => { + map_def(mapper, rd); + map_mem(mapper, mem); + } + &mut Inst::FpuStoreRev32 { + ref mut rd, + ref mut mem, + .. + } => { + map_use(mapper, rd); + map_mem(mapper, mem); + } + &mut Inst::FpuStoreRev64 { + ref mut rd, + ref mut mem, + .. + } => { + map_use(mapper, rd); + map_mem(mapper, mem); + } + &mut Inst::LoadFpuConst32 { ref mut rd, .. } => { + map_def(mapper, rd); + } + &mut Inst::LoadFpuConst64 { ref mut rd, .. } => { + map_def(mapper, rd); + } + &mut Inst::FpuToInt { + ref mut rd, + ref mut rn, + .. + } => { + map_def(mapper, rd); + map_use(mapper, rn); + } + &mut Inst::IntToFpu { + ref mut rd, + ref mut rn, + .. + } => { + map_def(mapper, rd); + map_use(mapper, rn); + } + &mut Inst::FpuRound { + ref mut rd, + ref mut rn, + .. + } => { + map_def(mapper, rd); + map_use(mapper, rn); + } + &mut Inst::FpuVecRRR { + ref mut rd, + ref mut rn, + ref mut rm, + .. + } => { + map_def(mapper, rd); + map_use(mapper, rn); + map_use(mapper, rm); + } + &mut Inst::Extend { + ref mut rd, + ref mut rn, + .. + } => { + map_def(mapper, rd); + map_use(mapper, rn); + } + &mut Inst::Call { + ref mut link, + ref mut info, + } => { + map_def(mapper, link); + for r in info.uses.iter_mut() { + map_use(mapper, r); + } + for r in info.defs.iter_mut() { + map_def(mapper, r); + } + } + &mut Inst::CallInd { + ref mut link, + ref mut info, + .. + } => { + map_def(mapper, link); + for r in info.uses.iter_mut() { + map_use(mapper, r); + } + for r in info.defs.iter_mut() { + map_def(mapper, r); + } + map_use(mapper, &mut info.rn); + } + &mut Inst::Ret { .. } => {} + &mut Inst::EpiloguePlaceholder => {} + &mut Inst::Jump { .. } => {} + &mut Inst::IndirectBr { ref mut rn, .. } => { + map_use(mapper, rn); + } + &mut Inst::CondBr { .. } | &mut Inst::OneWayCondBr { .. } => {} + &mut Inst::Debugtrap | &mut Inst::Trap { .. } | &mut Inst::TrapIf { .. } => {} + &mut Inst::Nop0 | &mut Inst::Nop2 => {} + &mut Inst::JTSequence { + ref mut ridx, + ref mut rtmp1, + ref mut rtmp2, + .. + } => { + map_use(mapper, ridx); + map_def(mapper, rtmp1); + map_def(mapper, rtmp2); + } + &mut Inst::LoadExtNameFar { ref mut rd, .. } => { + map_def(mapper, rd); + } + &mut Inst::LoadAddr { + ref mut rd, + ref mut mem, + } => { + map_def(mapper, rd); + map_mem(mapper, mem); + } + &mut Inst::VirtualSPOffsetAdj { .. } => {} + &mut Inst::ValueLabelMarker { ref mut reg, .. } => { + map_use(mapper, reg); + } + &mut Inst::Unwind { .. } => {} + } +} + +//============================================================================= +// Instructions: misc functions and external interface + +impl MachInst for Inst { + type LabelUse = LabelUse; + + fn get_regs(&self, collector: &mut RegUsageCollector) { + s390x_get_regs(self, collector) + } + + fn map_regs(&mut self, mapper: &RUM) { + s390x_map_regs(self, mapper); + } + + fn is_move(&self) -> Option<(Writable, Reg)> { + match self { + &Inst::Mov32 { rd, rm } => Some((rd, rm)), + &Inst::Mov64 { rd, rm } => Some((rd, rm)), + &Inst::FpuMove32 { rd, rn } => Some((rd, rn)), + &Inst::FpuMove64 { rd, rn } => Some((rd, rn)), + _ => None, + } + } + + fn is_epilogue_placeholder(&self) -> bool { + if let Inst::EpiloguePlaceholder = self { + true + } else { + false + } + } + + fn is_term<'a>(&'a self) -> MachTerminator<'a> { + match self { + &Inst::Ret { .. } | &Inst::EpiloguePlaceholder => MachTerminator::Ret, + &Inst::Jump { dest } => MachTerminator::Uncond(dest.as_label().unwrap()), + &Inst::CondBr { + taken, not_taken, .. + } => MachTerminator::Cond(taken.as_label().unwrap(), not_taken.as_label().unwrap()), + &Inst::OneWayCondBr { .. } => { + // Explicitly invisible to CFG processing. + MachTerminator::None + } + &Inst::IndirectBr { ref targets, .. } => MachTerminator::Indirect(&targets[..]), + &Inst::JTSequence { ref info, .. } => { + MachTerminator::Indirect(&info.targets_for_term[..]) + } + _ => MachTerminator::None, + } + } + + fn stack_op_info(&self) -> Option { + match self { + &Inst::VirtualSPOffsetAdj { offset } => Some(MachInstStackOpInfo::NomSPAdj(offset)), + &Inst::Store64 { + rd, + mem: MemArg::NominalSPOffset { off }, + } => Some(MachInstStackOpInfo::StoreNomSPOff(rd, off)), + &Inst::Load64 { + rd, + mem: MemArg::NominalSPOffset { off }, + } => Some(MachInstStackOpInfo::LoadNomSPOff(rd.to_reg(), off)), + _ => None, + } + } + + fn gen_move(to_reg: Writable, from_reg: Reg, ty: Type) -> Inst { + assert!(ty.bits() <= 64); + if ty.bits() <= 32 { + Inst::mov32(to_reg, from_reg) + } else { + Inst::mov64(to_reg, from_reg) + } + } + + fn gen_constant Writable>( + to_regs: ValueRegs>, + value: u128, + ty: Type, + _alloc_tmp: F, + ) -> SmallVec<[Inst; 4]> { + let to_reg = to_regs + .only_reg() + .expect("multi-reg values not supported yet"); + let value = value as u64; + match ty { + types::F64 => { + let mut ret = SmallVec::new(); + ret.push(Inst::load_fp_constant64(to_reg, f64::from_bits(value))); + ret + } + types::F32 => { + let mut ret = SmallVec::new(); + ret.push(Inst::load_fp_constant32( + to_reg, + f32::from_bits(value as u32), + )); + ret + } + types::I64 | types::B64 | types::R64 => Inst::load_constant64(to_reg, value), + types::B1 + | types::I8 + | types::B8 + | types::I16 + | types::B16 + | types::I32 + | types::B32 => Inst::load_constant32(to_reg, value as u32), + _ => unreachable!(), + } + } + + fn gen_nop(preferred_size: usize) -> Inst { + if preferred_size == 0 { + Inst::Nop0 + } else { + // We can't give a NOP (or any insn) < 2 bytes. + assert!(preferred_size >= 2); + Inst::Nop2 + } + } + + fn maybe_direct_reload(&self, _reg: VirtualReg, _slot: SpillSlot) -> Option { + None + } + + fn rc_for_type(ty: Type) -> CodegenResult<(&'static [RegClass], &'static [Type])> { + match ty { + types::I8 => Ok((&[RegClass::I64], &[types::I8])), + types::I16 => Ok((&[RegClass::I64], &[types::I16])), + types::I32 => Ok((&[RegClass::I64], &[types::I32])), + types::I64 => Ok((&[RegClass::I64], &[types::I64])), + types::B1 => Ok((&[RegClass::I64], &[types::B1])), + types::B8 => Ok((&[RegClass::I64], &[types::B8])), + types::B16 => Ok((&[RegClass::I64], &[types::B16])), + types::B32 => Ok((&[RegClass::I64], &[types::B32])), + types::B64 => Ok((&[RegClass::I64], &[types::B64])), + types::R32 => panic!("32-bit reftype pointer should never be seen on s390x"), + types::R64 => Ok((&[RegClass::I64], &[types::R64])), + types::F32 => Ok((&[RegClass::F64], &[types::F32])), + types::F64 => Ok((&[RegClass::F64], &[types::F64])), + types::I128 => Ok((&[RegClass::I64, RegClass::I64], &[types::I64, types::I64])), + types::B128 => Ok((&[RegClass::I64, RegClass::I64], &[types::B64, types::B64])), + // FIXME: We don't really have IFLAGS, but need to allow it here + // for now to support the SelectifSpectreGuard instruction. + types::IFLAGS => Ok((&[RegClass::I64], &[types::I64])), + _ => Err(CodegenError::Unsupported(format!( + "Unexpected SSA-value type: {}", + ty + ))), + } + } + + fn gen_jump(target: MachLabel) -> Inst { + Inst::Jump { + dest: BranchTarget::Label(target), + } + } + + fn reg_universe(flags: &settings::Flags) -> RealRegUniverse { + create_reg_universe(flags) + } + + fn worst_case_size() -> CodeOffset { + // The maximum size, in bytes, of any `Inst`'s emitted code. We have at least one case of + // an 8-instruction sequence (saturating int-to-float conversions) with three embedded + // 64-bit f64 constants. + // + // Note that inline jump-tables handle island/pool insertion separately, so we do not need + // to account for them here (otherwise the worst case would be 2^31 * 4, clearly not + // feasible for other reasons). + 44 + } + + fn ref_type_regclass(_: &settings::Flags) -> RegClass { + RegClass::I64 + } + + fn gen_value_label_marker(label: ValueLabel, reg: Reg) -> Self { + Inst::ValueLabelMarker { label, reg } + } + + fn defines_value_label(&self) -> Option<(ValueLabel, Reg)> { + match self { + Inst::ValueLabelMarker { label, reg } => Some((*label, *reg)), + _ => None, + } + } +} + +//============================================================================= +// Pretty-printing of instructions. + +fn mem_finalize_for_show( + mem: &MemArg, + mb_rru: Option<&RealRegUniverse>, + state: &EmitState, + have_d12: bool, + have_d20: bool, + have_pcrel: bool, + have_index: bool, +) -> (String, MemArg) { + let (mem_insts, mem) = mem_finalize(mem, state, have_d12, have_d20, have_pcrel, have_index); + let mut mem_str = mem_insts + .into_iter() + .map(|inst| inst.show_rru(mb_rru)) + .collect::>() + .join(" ; "); + if !mem_str.is_empty() { + mem_str += " ; "; + } + + (mem_str, mem) +} + +impl PrettyPrint for Inst { + fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String { + self.pretty_print(mb_rru, &mut EmitState::default()) + } +} + +impl Inst { + fn print_with_state(&self, mb_rru: Option<&RealRegUniverse>, state: &mut EmitState) -> String { + match self { + &Inst::Nop0 => "nop-zero-len".to_string(), + &Inst::Nop2 => "nop".to_string(), + &Inst::AluRRR { alu_op, rd, rn, rm } => { + let (op, have_rr) = match alu_op { + ALUOp::Add32 => ("ark", true), + ALUOp::Add64 => ("agrk", true), + ALUOp::Sub32 => ("srk", true), + ALUOp::Sub64 => ("sgrk", true), + ALUOp::Mul32 => ("msrkc", true), + ALUOp::Mul64 => ("msgrkc", true), + ALUOp::And32 => ("nrk", true), + ALUOp::And64 => ("ngrk", true), + ALUOp::Orr32 => ("ork", true), + ALUOp::Orr64 => ("ogrk", true), + ALUOp::Xor32 => ("xrk", true), + ALUOp::Xor64 => ("xgrk", true), + ALUOp::AndNot32 => ("nnrk", false), + ALUOp::AndNot64 => ("nngrk", false), + ALUOp::OrrNot32 => ("nork", false), + ALUOp::OrrNot64 => ("nogrk", false), + ALUOp::XorNot32 => ("nxrk", false), + ALUOp::XorNot64 => ("nxgrk", false), + _ => unreachable!(), + }; + if have_rr && rd.to_reg() == rn { + let inst = Inst::AluRR { alu_op, rd, rm }; + return inst.print_with_state(mb_rru, state); + } + let rd = rd.to_reg().show_rru(mb_rru); + let rn = rn.show_rru(mb_rru); + let rm = rm.show_rru(mb_rru); + format!("{} {}, {}, {}", op, rd, rn, rm) + } + &Inst::AluRRSImm16 { + alu_op, + rd, + rn, + imm, + } => { + if rd.to_reg() == rn { + let inst = Inst::AluRSImm16 { alu_op, rd, imm }; + return inst.print_with_state(mb_rru, state); + } + let op = match alu_op { + ALUOp::Add32 => "ahik", + ALUOp::Add64 => "aghik", + _ => unreachable!(), + }; + let rd = rd.to_reg().show_rru(mb_rru); + let rn = rn.show_rru(mb_rru); + format!("{} {}, {}, {}", op, rd, rn, imm) + } + &Inst::AluRR { alu_op, rd, rm } => { + let op = match alu_op { + ALUOp::Add32 => "ar", + ALUOp::Add64 => "agr", + ALUOp::Add64Ext32 => "agfr", + ALUOp::Sub32 => "sr", + ALUOp::Sub64 => "sgr", + ALUOp::Sub64Ext32 => "sgfr", + ALUOp::Mul32 => "msr", + ALUOp::Mul64 => "msgr", + ALUOp::Mul64Ext32 => "msgfr", + ALUOp::And32 => "nr", + ALUOp::And64 => "ngr", + ALUOp::Orr32 => "or", + ALUOp::Orr64 => "ogr", + ALUOp::Xor32 => "xr", + ALUOp::Xor64 => "xgr", + _ => unreachable!(), + }; + let rd = rd.to_reg().show_rru(mb_rru); + let rm = rm.show_rru(mb_rru); + format!("{} {}, {}", op, rd, rm) + } + &Inst::AluRX { + alu_op, + rd, + ref mem, + } => { + let (opcode_rx, opcode_rxy) = match alu_op { + ALUOp::Add32 => (Some("a"), Some("ay")), + ALUOp::Add32Ext16 => (Some("ah"), Some("ahy")), + ALUOp::Add64 => (None, Some("ag")), + ALUOp::Add64Ext16 => (None, Some("agh")), + ALUOp::Add64Ext32 => (None, Some("agf")), + ALUOp::Sub32 => (Some("s"), Some("sy")), + ALUOp::Sub32Ext16 => (Some("sh"), Some("shy")), + ALUOp::Sub64 => (None, Some("sg")), + ALUOp::Sub64Ext16 => (None, Some("sgh")), + ALUOp::Sub64Ext32 => (None, Some("sgf")), + ALUOp::Mul32 => (Some("ms"), Some("msy")), + ALUOp::Mul32Ext16 => (Some("mh"), Some("mhy")), + ALUOp::Mul64 => (None, Some("msg")), + ALUOp::Mul64Ext16 => (None, Some("mgh")), + ALUOp::Mul64Ext32 => (None, Some("msgf")), + ALUOp::And32 => (Some("n"), Some("ny")), + ALUOp::And64 => (None, Some("ng")), + ALUOp::Orr32 => (Some("o"), Some("oy")), + ALUOp::Orr64 => (None, Some("og")), + ALUOp::Xor32 => (Some("x"), Some("xy")), + ALUOp::Xor64 => (None, Some("xg")), + _ => unreachable!(), + }; + + let (mem_str, mem) = mem_finalize_for_show( + mem, + mb_rru, + state, + opcode_rx.is_some(), + opcode_rxy.is_some(), + false, + true, + ); + + let op = match &mem { + &MemArg::BXD12 { .. } => opcode_rx, + &MemArg::BXD20 { .. } => opcode_rxy, + _ => unreachable!(), + }; + + let rd = rd.to_reg().show_rru(mb_rru); + let mem = mem.show_rru(mb_rru); + format!("{}{} {}, {}", mem_str, op.unwrap(), rd, mem) + } + &Inst::AluRSImm16 { alu_op, rd, imm } => { + let op = match alu_op { + ALUOp::Add32 => "ahi", + ALUOp::Add64 => "aghi", + ALUOp::Mul32 => "mhi", + ALUOp::Mul64 => "mghi", + _ => unreachable!(), + }; + let rd = rd.to_reg().show_rru(mb_rru); + format!("{} {}, {}", op, rd, imm) + } + &Inst::AluRSImm32 { alu_op, rd, imm } => { + let op = match alu_op { + ALUOp::Add32 => "afi", + ALUOp::Add64 => "agfi", + ALUOp::Mul32 => "msfi", + ALUOp::Mul64 => "msgfi", + _ => unreachable!(), + }; + let rd = rd.to_reg().show_rru(mb_rru); + format!("{} {}, {}", op, rd, imm) + } + &Inst::AluRUImm32 { alu_op, rd, imm } => { + let op = match alu_op { + ALUOp::Add32 => "alfi", + ALUOp::Add64 => "algfi", + ALUOp::Sub32 => "slfi", + ALUOp::Sub64 => "slgfi", + _ => unreachable!(), + }; + let rd = rd.to_reg().show_rru(mb_rru); + format!("{} {}, {}", op, rd, imm) + } + &Inst::AluRUImm16Shifted { alu_op, rd, imm } => { + let op = match (alu_op, imm.shift) { + (ALUOp::And32, 0) => "nill", + (ALUOp::And32, 1) => "nilh", + (ALUOp::And64, 0) => "nill", + (ALUOp::And64, 1) => "nilh", + (ALUOp::And64, 2) => "nihl", + (ALUOp::And64, 3) => "nihh", + (ALUOp::Orr32, 0) => "oill", + (ALUOp::Orr32, 1) => "oilh", + (ALUOp::Orr64, 0) => "oill", + (ALUOp::Orr64, 1) => "oilh", + (ALUOp::Orr64, 2) => "oihl", + (ALUOp::Orr64, 3) => "oihh", + _ => unreachable!(), + }; + let rd = rd.to_reg().show_rru(mb_rru); + format!("{} {}, {}", op, rd, imm.bits) + } + &Inst::AluRUImm32Shifted { alu_op, rd, imm } => { + let op = match (alu_op, imm.shift) { + (ALUOp::And32, 0) => "nilf", + (ALUOp::And64, 0) => "nilf", + (ALUOp::And64, 1) => "nihf", + (ALUOp::Orr32, 0) => "oilf", + (ALUOp::Orr64, 0) => "oilf", + (ALUOp::Orr64, 1) => "oihf", + (ALUOp::Xor32, 0) => "xilf", + (ALUOp::Xor64, 0) => "xilf", + (ALUOp::Xor64, 1) => "xihf", + _ => unreachable!(), + }; + let rd = rd.to_reg().show_rru(mb_rru); + format!("{} {}, {}", op, rd, imm.bits) + } + &Inst::SMulWide { rn, rm } => { + let op = "mgrk"; + let rd = gpr(0).show_rru(mb_rru); + let rn = rn.show_rru(mb_rru); + let rm = rm.show_rru(mb_rru); + format!("{} {}, {}, {}", op, rd, rn, rm) + } + &Inst::UMulWide { rn } => { + let op = "mlgr"; + let rd = gpr(0).show_rru(mb_rru); + let rn = rn.show_rru(mb_rru); + format!("{} {}, {}", op, rd, rn) + } + &Inst::SDivMod32 { rn, .. } => { + let op = "dsgfr"; + let rd = gpr(0).show_rru(mb_rru); + let rn = rn.show_rru(mb_rru); + format!("{} {}, {}", op, rd, rn) + } + &Inst::SDivMod64 { rn, .. } => { + let op = "dsgr"; + let rd = gpr(0).show_rru(mb_rru); + let rn = rn.show_rru(mb_rru); + format!("{} {}, {}", op, rd, rn) + } + &Inst::UDivMod32 { rn, .. } => { + let op = "dlr"; + let rd = gpr(0).show_rru(mb_rru); + let rn = rn.show_rru(mb_rru); + format!("{} {}, {}", op, rd, rn) + } + &Inst::UDivMod64 { rn, .. } => { + let op = "dlgr"; + let rd = gpr(0).show_rru(mb_rru); + let rn = rn.show_rru(mb_rru); + format!("{} {}, {}", op, rd, rn) + } + &Inst::Flogr { rn } => { + let op = "flogr"; + let rd = gpr(0).show_rru(mb_rru); + let rn = rn.show_rru(mb_rru); + format!("{} {}, {}", op, rd, rn) + } + &Inst::ShiftRR { + shift_op, + rd, + rn, + shift_imm, + ref shift_reg, + } => { + let op = match shift_op { + ShiftOp::RotL32 => "rll", + ShiftOp::RotL64 => "rllg", + ShiftOp::LShL32 => "sllk", + ShiftOp::LShL64 => "sllg", + ShiftOp::LShR32 => "srlk", + ShiftOp::LShR64 => "srlg", + ShiftOp::AShR32 => "srak", + ShiftOp::AShR64 => "srag", + }; + let rd = rd.to_reg().show_rru(mb_rru); + let rn = rn.show_rru(mb_rru); + let shift_imm = shift_imm.show_rru(mb_rru); + let shift_reg = match shift_reg { + Some(reg) => format!("({})", reg.show_rru(mb_rru)), + None => "".to_string(), + }; + format!("{} {}, {}, {}{}", op, rd, rn, shift_imm, shift_reg) + } + &Inst::UnaryRR { op, rd, rn } => { + let (op, extra) = match op { + UnaryOp::Abs32 => ("lpr", ""), + UnaryOp::Abs64 => ("lpgr", ""), + UnaryOp::Abs64Ext32 => ("lpgfr", ""), + UnaryOp::Neg32 => ("lcr", ""), + UnaryOp::Neg64 => ("lcgr", ""), + UnaryOp::Neg64Ext32 => ("lcgfr", ""), + UnaryOp::PopcntByte => ("popcnt", ""), + UnaryOp::PopcntReg => ("popcnt", ", 8"), + }; + let rd = rd.to_reg().show_rru(mb_rru); + let rn = rn.show_rru(mb_rru); + format!("{} {}, {}{}", op, rd, rn, extra) + } + &Inst::CmpRR { op, rn, rm } => { + let op = match op { + CmpOp::CmpS32 => "cr", + CmpOp::CmpS64 => "cgr", + CmpOp::CmpS64Ext32 => "cgfr", + CmpOp::CmpL32 => "clr", + CmpOp::CmpL64 => "clgr", + CmpOp::CmpL64Ext32 => "clgfr", + _ => unreachable!(), + }; + let rn = rn.show_rru(mb_rru); + let rm = rm.show_rru(mb_rru); + format!("{} {}, {}", op, rn, rm) + } + &Inst::CmpRX { op, rn, ref mem } => { + let (opcode_rx, opcode_rxy, opcode_ril) = match op { + CmpOp::CmpS32 => (Some("c"), Some("cy"), Some("crl")), + CmpOp::CmpS32Ext16 => (Some("ch"), Some("chy"), Some("chrl")), + CmpOp::CmpS64 => (None, Some("cg"), Some("cgrl")), + CmpOp::CmpS64Ext16 => (None, Some("cgh"), Some("cghrl")), + CmpOp::CmpS64Ext32 => (None, Some("cgf"), Some("cgfrl")), + CmpOp::CmpL32 => (Some("cl"), Some("cly"), Some("clrl")), + CmpOp::CmpL32Ext16 => (None, None, Some("clhrl")), + CmpOp::CmpL64 => (None, Some("clg"), Some("clgrl")), + CmpOp::CmpL64Ext16 => (None, None, Some("clghrl")), + CmpOp::CmpL64Ext32 => (None, Some("clgf"), Some("clgfrl")), + }; + + let (mem_str, mem) = mem_finalize_for_show( + mem, + mb_rru, + state, + opcode_rx.is_some(), + opcode_rxy.is_some(), + opcode_ril.is_some(), + true, + ); + + let op = match &mem { + &MemArg::BXD12 { .. } => opcode_rx, + &MemArg::BXD20 { .. } => opcode_rxy, + &MemArg::Label { .. } | &MemArg::Symbol { .. } => opcode_ril, + _ => unreachable!(), + }; + + let rn = rn.show_rru(mb_rru); + let mem = mem.show_rru(mb_rru); + format!("{}{} {}, {}", mem_str, op.unwrap(), rn, mem) + } + &Inst::CmpRSImm16 { op, rn, imm } => { + let op = match op { + CmpOp::CmpS32 => "chi", + CmpOp::CmpS64 => "cghi", + _ => unreachable!(), + }; + let rn = rn.show_rru(mb_rru); + format!("{} {}, {}", op, rn, imm) + } + &Inst::CmpRSImm32 { op, rn, imm } => { + let op = match op { + CmpOp::CmpS32 => "cfi", + CmpOp::CmpS64 => "cgfi", + _ => unreachable!(), + }; + let rn = rn.show_rru(mb_rru); + format!("{} {}, {}", op, rn, imm) + } + &Inst::CmpRUImm32 { op, rn, imm } => { + let op = match op { + CmpOp::CmpL32 => "clfi", + CmpOp::CmpL64 => "clgfi", + _ => unreachable!(), + }; + let rn = rn.show_rru(mb_rru); + format!("{} {}, {}", op, rn, imm) + } + &Inst::CmpTrapRR { + op, rn, rm, cond, .. + } => { + let op = match op { + CmpOp::CmpS32 => "crt", + CmpOp::CmpS64 => "cgrt", + CmpOp::CmpL32 => "clrt", + CmpOp::CmpL64 => "clgrt", + _ => unreachable!(), + }; + let rn = rn.show_rru(mb_rru); + let rm = rm.show_rru(mb_rru); + let cond = cond.show_rru(mb_rru); + format!("{}{} {}, {}", op, cond, rn, rm) + } + &Inst::CmpTrapRSImm16 { + op, rn, imm, cond, .. + } => { + let op = match op { + CmpOp::CmpS32 => "cit", + CmpOp::CmpS64 => "cgit", + _ => unreachable!(), + }; + let rn = rn.show_rru(mb_rru); + let cond = cond.show_rru(mb_rru); + format!("{}{} {}, {}", op, cond, rn, imm) + } + &Inst::CmpTrapRUImm16 { + op, rn, imm, cond, .. + } => { + let op = match op { + CmpOp::CmpL32 => "clfit", + CmpOp::CmpL64 => "clgit", + _ => unreachable!(), + }; + let rn = rn.show_rru(mb_rru); + let cond = cond.show_rru(mb_rru); + format!("{}{} {}, {}", op, cond, rn, imm) + } + &Inst::Load32 { rd, ref mem } + | &Inst::Load32ZExt8 { rd, ref mem } + | &Inst::Load32SExt8 { rd, ref mem } + | &Inst::Load32ZExt16 { rd, ref mem } + | &Inst::Load32SExt16 { rd, ref mem } + | &Inst::Load64 { rd, ref mem } + | &Inst::Load64ZExt8 { rd, ref mem } + | &Inst::Load64SExt8 { rd, ref mem } + | &Inst::Load64ZExt16 { rd, ref mem } + | &Inst::Load64SExt16 { rd, ref mem } + | &Inst::Load64ZExt32 { rd, ref mem } + | &Inst::Load64SExt32 { rd, ref mem } + | &Inst::LoadRev16 { rd, ref mem } + | &Inst::LoadRev32 { rd, ref mem } + | &Inst::LoadRev64 { rd, ref mem } + | &Inst::FpuLoad32 { rd, ref mem } + | &Inst::FpuLoad64 { rd, ref mem } => { + let (opcode_rx, opcode_rxy, opcode_ril) = match self { + &Inst::Load32 { .. } => (Some("l"), Some("ly"), Some("lrl")), + &Inst::Load32ZExt8 { .. } => (None, Some("llc"), None), + &Inst::Load32SExt8 { .. } => (None, Some("lb"), None), + &Inst::Load32ZExt16 { .. } => (None, Some("llh"), Some("llhrl")), + &Inst::Load32SExt16 { .. } => (Some("lh"), Some("lhy"), Some("lhrl")), + &Inst::Load64 { .. } => (None, Some("lg"), Some("lgrl")), + &Inst::Load64ZExt8 { .. } => (None, Some("llgc"), None), + &Inst::Load64SExt8 { .. } => (None, Some("lgb"), None), + &Inst::Load64ZExt16 { .. } => (None, Some("llgh"), Some("llghrl")), + &Inst::Load64SExt16 { .. } => (None, Some("lgh"), Some("lghrl")), + &Inst::Load64ZExt32 { .. } => (None, Some("llgf"), Some("llgfrl")), + &Inst::Load64SExt32 { .. } => (None, Some("lgf"), Some("lgfrl")), + &Inst::LoadRev16 { .. } => (None, Some("lrvh"), None), + &Inst::LoadRev32 { .. } => (None, Some("lrv"), None), + &Inst::LoadRev64 { .. } => (None, Some("lrvg"), None), + &Inst::FpuLoad32 { .. } => (Some("le"), Some("ley"), None), + &Inst::FpuLoad64 { .. } => (Some("ld"), Some("ldy"), None), + _ => unreachable!(), + }; + + let (mem_str, mem) = mem_finalize_for_show( + mem, + mb_rru, + state, + opcode_rx.is_some(), + opcode_rxy.is_some(), + opcode_ril.is_some(), + true, + ); + + let op = match &mem { + &MemArg::BXD12 { .. } => opcode_rx, + &MemArg::BXD20 { .. } => opcode_rxy, + &MemArg::Label { .. } | &MemArg::Symbol { .. } => opcode_ril, + _ => unreachable!(), + }; + + let rd = rd.to_reg().show_rru(mb_rru); + let mem = mem.show_rru(mb_rru); + format!("{}{} {}, {}", mem_str, op.unwrap(), rd, mem) + } + &Inst::FpuLoadRev32 { rd, ref mem } | &Inst::FpuLoadRev64 { rd, ref mem } => { + let (mem_str, mem) = + mem_finalize_for_show(mem, mb_rru, state, true, false, false, true); + + let op = match self { + &Inst::FpuLoadRev32 { .. } => "vlebrf", + &Inst::FpuLoadRev64 { .. } => "vlebrg", + _ => unreachable!(), + }; + let rd = rd.to_reg().show_rru(mb_rru); + let mem = mem.show_rru(mb_rru); + format!("{}{} {}, {}, 0", mem_str, op, rd, mem) + } + &Inst::Store8 { rd, ref mem } + | &Inst::Store16 { rd, ref mem } + | &Inst::Store32 { rd, ref mem } + | &Inst::Store64 { rd, ref mem } + | &Inst::StoreRev16 { rd, ref mem } + | &Inst::StoreRev32 { rd, ref mem } + | &Inst::StoreRev64 { rd, ref mem } + | &Inst::FpuStore32 { rd, ref mem } + | &Inst::FpuStore64 { rd, ref mem } => { + let (opcode_rx, opcode_rxy, opcode_ril) = match self { + &Inst::Store8 { .. } => (Some("stc"), Some("stcy"), None), + &Inst::Store16 { .. } => (Some("sth"), Some("sthy"), Some("sthrl")), + &Inst::Store32 { .. } => (Some("st"), Some("sty"), Some("strl")), + &Inst::Store64 { .. } => (None, Some("stg"), Some("stgrl")), + &Inst::StoreRev16 { .. } => (None, Some("strvh"), None), + &Inst::StoreRev32 { .. } => (None, Some("strv"), None), + &Inst::StoreRev64 { .. } => (None, Some("strvg"), None), + &Inst::FpuStore32 { .. } => (Some("ste"), Some("stey"), None), + &Inst::FpuStore64 { .. } => (Some("std"), Some("stdy"), None), + _ => unreachable!(), + }; + + let (mem_str, mem) = mem_finalize_for_show( + mem, + mb_rru, + state, + opcode_rx.is_some(), + opcode_rxy.is_some(), + opcode_ril.is_some(), + true, + ); + + let op = match &mem { + &MemArg::BXD12 { .. } => opcode_rx, + &MemArg::BXD20 { .. } => opcode_rxy, + &MemArg::Label { .. } | &MemArg::Symbol { .. } => opcode_ril, + _ => unreachable!(), + }; + + let rd = rd.show_rru(mb_rru); + let mem = mem.show_rru(mb_rru); + format!("{}{} {}, {}", mem_str, op.unwrap(), rd, mem) + } + &Inst::StoreImm8 { imm, ref mem } => { + let (mem_str, mem) = + mem_finalize_for_show(mem, mb_rru, state, true, true, false, false); + let op = match &mem { + &MemArg::BXD12 { .. } => "mvi", + &MemArg::BXD20 { .. } => "mviy", + _ => unreachable!(), + }; + + let mem = mem.show_rru(mb_rru); + format!("{}{} {}, {}", mem_str, op, mem, imm) + } + &Inst::StoreImm16 { imm, ref mem } + | &Inst::StoreImm32SExt16 { imm, ref mem } + | &Inst::StoreImm64SExt16 { imm, ref mem } => { + let (mem_str, mem) = + mem_finalize_for_show(mem, mb_rru, state, false, true, false, false); + let op = match self { + &Inst::StoreImm16 { .. } => "mvhhi", + &Inst::StoreImm32SExt16 { .. } => "mvhi", + &Inst::StoreImm64SExt16 { .. } => "mvghi", + _ => unreachable!(), + }; + + let mem = mem.show_rru(mb_rru); + format!("{}{} {}, {}", mem_str, op, mem, imm) + } + &Inst::FpuStoreRev32 { rd, ref mem } | &Inst::FpuStoreRev64 { rd, ref mem } => { + let (mem_str, mem) = + mem_finalize_for_show(mem, mb_rru, state, true, false, false, true); + + let op = match self { + &Inst::FpuStoreRev32 { .. } => "vstebrf", + &Inst::FpuStoreRev64 { .. } => "vstebrg", + _ => unreachable!(), + }; + let rd = rd.show_rru(mb_rru); + let mem = mem.show_rru(mb_rru); + format!("{}{} {}, {}, 0", mem_str, op, rd, mem) + } + &Inst::LoadMultiple64 { + rt, + rt2, + addr_reg, + addr_off, + } => { + let rt = rt.show_rru(mb_rru); + let rt2 = rt2.show_rru(mb_rru); + let addr_reg = addr_reg.show_rru(mb_rru); + let addr_off = addr_off.show_rru(mb_rru); + format!("lmg {}, {}, {}({})", rt, rt2, addr_off, addr_reg) + } + &Inst::StoreMultiple64 { + rt, + rt2, + addr_reg, + addr_off, + } => { + let rt = rt.show_rru(mb_rru); + let rt2 = rt2.show_rru(mb_rru); + let addr_reg = addr_reg.show_rru(mb_rru); + let addr_off = addr_off.show_rru(mb_rru); + format!("stmg {}, {}, {}({})", rt, rt2, addr_off, addr_reg) + } + &Inst::Mov64 { rd, rm } => { + let rd = rd.to_reg().show_rru(mb_rru); + let rm = rm.show_rru(mb_rru); + format!("lgr {}, {}", rd, rm) + } + &Inst::Mov32 { rd, rm } => { + let rd = rd.to_reg().show_rru(mb_rru); + let rm = rm.show_rru(mb_rru); + format!("lr {}, {}", rd, rm) + } + &Inst::Mov32Imm { rd, ref imm } => { + let rd = rd.to_reg().show_rru(mb_rru); + format!("iilf {}, {}", rd, imm) + } + &Inst::Mov32SImm16 { rd, ref imm } => { + let rd = rd.to_reg().show_rru(mb_rru); + format!("lhi {}, {}", rd, imm) + } + &Inst::Mov64SImm16 { rd, ref imm } => { + let rd = rd.to_reg().show_rru(mb_rru); + format!("lghi {}, {}", rd, imm) + } + &Inst::Mov64SImm32 { rd, ref imm } => { + let rd = rd.to_reg().show_rru(mb_rru); + format!("lgfi {}, {}", rd, imm) + } + &Inst::Mov64UImm16Shifted { rd, ref imm } => { + let rd = rd.to_reg().show_rru(mb_rru); + let op = match imm.shift { + 0 => "llill", + 1 => "llilh", + 2 => "llihl", + 3 => "llihh", + _ => unreachable!(), + }; + format!("{} {}, {}", op, rd, imm.bits) + } + &Inst::Mov64UImm32Shifted { rd, ref imm } => { + let rd = rd.to_reg().show_rru(mb_rru); + let op = match imm.shift { + 0 => "llilf", + 1 => "llihf", + _ => unreachable!(), + }; + format!("{} {}, {}", op, rd, imm.bits) + } + &Inst::Insert64UImm16Shifted { rd, ref imm } => { + let rd = rd.to_reg().show_rru(mb_rru); + let op = match imm.shift { + 0 => "iill", + 1 => "iilh", + 2 => "iihl", + 3 => "iihh", + _ => unreachable!(), + }; + format!("{} {}, {}", op, rd, imm.bits) + } + &Inst::Insert64UImm32Shifted { rd, ref imm } => { + let rd = rd.to_reg().show_rru(mb_rru); + let op = match imm.shift { + 0 => "iilf", + 1 => "iihf", + _ => unreachable!(), + }; + format!("{} {}, {}", op, rd, imm.bits) + } + &Inst::CMov32 { rd, cond, rm } => { + let rd = rd.to_reg().show_rru(mb_rru); + let rm = rm.show_rru(mb_rru); + let cond = cond.show_rru(mb_rru); + format!("locr{} {}, {}", cond, rd, rm) + } + &Inst::CMov64 { rd, cond, rm } => { + let rd = rd.to_reg().show_rru(mb_rru); + let rm = rm.show_rru(mb_rru); + let cond = cond.show_rru(mb_rru); + format!("locgr{} {}, {}", cond, rd, rm) + } + &Inst::CMov32SImm16 { rd, cond, ref imm } => { + let rd = rd.to_reg().show_rru(mb_rru); + let cond = cond.show_rru(mb_rru); + format!("lochi{} {}, {}", cond, rd, imm) + } + &Inst::CMov64SImm16 { rd, cond, ref imm } => { + let rd = rd.to_reg().show_rru(mb_rru); + let cond = cond.show_rru(mb_rru); + format!("locghi{} {}, {}", cond, rd, imm) + } + &Inst::FpuMove32 { rd, rn } => { + let rd = rd.to_reg().show_rru(mb_rru); + let rn = rn.show_rru(mb_rru); + format!("ler {}, {}", rd, rn) + } + &Inst::FpuMove64 { rd, rn } => { + let rd = rd.to_reg().show_rru(mb_rru); + let rn = rn.show_rru(mb_rru); + format!("ldr {}, {}", rd, rn) + } + &Inst::FpuCMov32 { rd, cond, rm } => { + let rd = rd.to_reg().show_rru(mb_rru); + let rm = rm.show_rru(mb_rru); + let cond = cond.invert().show_rru(mb_rru); + format!("j{} 6 ; ler {}, {}", cond, rd, rm) + } + &Inst::FpuCMov64 { rd, cond, rm } => { + let rd = rd.to_reg().show_rru(mb_rru); + let rm = rm.show_rru(mb_rru); + let cond = cond.invert().show_rru(mb_rru); + format!("j{} 6 ; ldr {}, {}", cond, rd, rm) + } + &Inst::MovToFpr { rd, rn } => { + let rd = rd.to_reg().show_rru(mb_rru); + let rn = rn.show_rru(mb_rru); + format!("ldgr {}, {}", rd, rn) + } + &Inst::MovFromFpr { rd, rn } => { + let rd = rd.to_reg().show_rru(mb_rru); + let rn = rn.show_rru(mb_rru); + format!("lgdr {}, {}", rd, rn) + } + &Inst::FpuRR { fpu_op, rd, rn } => { + let op = match fpu_op { + FPUOp1::Abs32 => "lpebr", + FPUOp1::Abs64 => "lpdbr", + FPUOp1::Neg32 => "lcebr", + FPUOp1::Neg64 => "lcdbr", + FPUOp1::NegAbs32 => "lnebr", + FPUOp1::NegAbs64 => "lndbr", + FPUOp1::Sqrt32 => "sqebr", + FPUOp1::Sqrt64 => "sqdbr", + FPUOp1::Cvt32To64 => "ldebr", + FPUOp1::Cvt64To32 => "ledbr", + }; + let rd = rd.to_reg().show_rru(mb_rru); + let rn = rn.show_rru(mb_rru); + format!("{} {}, {}", op, rd, rn) + } + &Inst::FpuRRR { fpu_op, rd, rm } => { + let op = match fpu_op { + FPUOp2::Add32 => "aebr", + FPUOp2::Add64 => "adbr", + FPUOp2::Sub32 => "sebr", + FPUOp2::Sub64 => "sdbr", + FPUOp2::Mul32 => "meebr", + FPUOp2::Mul64 => "mdbr", + FPUOp2::Div32 => "debr", + FPUOp2::Div64 => "ddbr", + _ => unimplemented!(), + }; + let rd = rd.to_reg().show_rru(mb_rru); + let rm = rm.show_rru(mb_rru); + format!("{} {}, {}", op, rd, rm) + } + &Inst::FpuRRRR { fpu_op, rd, rn, rm } => { + let op = match fpu_op { + FPUOp3::MAdd32 => "maebr", + FPUOp3::MAdd64 => "madbr", + FPUOp3::MSub32 => "msebr", + FPUOp3::MSub64 => "msdbr", + }; + let rd = rd.to_reg().show_rru(mb_rru); + let rn = rn.show_rru(mb_rru); + let rm = rm.show_rru(mb_rru); + format!("{} {}, {}, {}", op, rd, rn, rm) + } + &Inst::FpuCopysign { rd, rn, rm } => { + let rd = rd.to_reg().show_rru(mb_rru); + let rn = rn.show_rru(mb_rru); + let rm = rm.show_rru(mb_rru); + format!("cpsdr {}, {}, {}", rd, rm, rn) + } + &Inst::FpuCmp32 { rn, rm } => { + let rn = rn.show_rru(mb_rru); + let rm = rm.show_rru(mb_rru); + format!("cebr {}, {}", rn, rm) + } + &Inst::FpuCmp64 { rn, rm } => { + let rn = rn.show_rru(mb_rru); + let rm = rm.show_rru(mb_rru); + format!("cdbr {}, {}", rn, rm) + } + &Inst::LoadFpuConst32 { rd, const_data } => { + let rd = rd.to_reg().show_rru(mb_rru); + let tmp = writable_spilltmp_reg().to_reg().show_rru(mb_rru); + format!( + "bras {}, 8 ; data.f32 {} ; le {}, 0({})", + tmp, const_data, rd, tmp + ) + } + &Inst::LoadFpuConst64 { rd, const_data } => { + let rd = rd.to_reg().show_rru(mb_rru); + let tmp = writable_spilltmp_reg().to_reg().show_rru(mb_rru); + format!( + "bras {}, 12 ; data.f64 {} ; ld {}, 0({})", + tmp, const_data, rd, tmp + ) + } + &Inst::FpuToInt { op, rd, rn } => { + let op = match op { + FpuToIntOp::F32ToI32 => "cfebra", + FpuToIntOp::F32ToU32 => "clfebr", + FpuToIntOp::F32ToI64 => "cgebra", + FpuToIntOp::F32ToU64 => "clgebr", + FpuToIntOp::F64ToI32 => "cfdbra", + FpuToIntOp::F64ToU32 => "clfdbr", + FpuToIntOp::F64ToI64 => "cgdbra", + FpuToIntOp::F64ToU64 => "clgdbr", + }; + let rd = rd.to_reg().show_rru(mb_rru); + let rn = rn.show_rru(mb_rru); + format!("{} {}, 5, {}, 0", op, rd, rn) + } + &Inst::IntToFpu { op, rd, rn } => { + let op = match op { + IntToFpuOp::I32ToF32 => "cefbra", + IntToFpuOp::U32ToF32 => "celfbr", + IntToFpuOp::I64ToF32 => "cegbra", + IntToFpuOp::U64ToF32 => "celgbr", + IntToFpuOp::I32ToF64 => "cdfbra", + IntToFpuOp::U32ToF64 => "cdlfbr", + IntToFpuOp::I64ToF64 => "cdgbra", + IntToFpuOp::U64ToF64 => "cdlgbr", + }; + let rd = rd.to_reg().show_rru(mb_rru); + let rn = rn.show_rru(mb_rru); + format!("{} {}, 0, {}, 0", op, rd, rn) + } + &Inst::FpuRound { op, rd, rn } => { + let (op, m3) = match op { + FpuRoundMode::Minus32 => ("fiebr", 7), + FpuRoundMode::Minus64 => ("fidbr", 7), + FpuRoundMode::Plus32 => ("fiebr", 6), + FpuRoundMode::Plus64 => ("fidbr", 6), + FpuRoundMode::Zero32 => ("fiebr", 5), + FpuRoundMode::Zero64 => ("fidbr", 5), + FpuRoundMode::Nearest32 => ("fiebr", 4), + FpuRoundMode::Nearest64 => ("fidbr", 4), + }; + let rd = rd.to_reg().show_rru(mb_rru); + let rn = rn.show_rru(mb_rru); + format!("{} {}, {}, {}", op, rd, rn, m3) + } + &Inst::FpuVecRRR { fpu_op, rd, rn, rm } => { + let op = match fpu_op { + FPUOp2::Max32 => "wfmaxsb", + FPUOp2::Max64 => "wfmaxdb", + FPUOp2::Min32 => "wfminsb", + FPUOp2::Min64 => "wfmindb", + _ => unimplemented!(), + }; + let rd = rd.to_reg().show_rru(mb_rru); + let rn = rn.show_rru(mb_rru); + let rm = rm.show_rru(mb_rru); + format!("{} {}, {}, {}, 1", op, rd, rn, rm) + } + &Inst::Extend { + rd, + rn, + signed, + from_bits, + to_bits, + } => { + let rd = rd.to_reg().show_rru(mb_rru); + let rn = rn.show_rru(mb_rru); + let op = match (signed, from_bits, to_bits) { + (_, 1, 32) => "llcr", + (_, 1, 64) => "llgcr", + (false, 8, 32) => "llcr", + (false, 8, 64) => "llgcr", + (true, 8, 32) => "lbr", + (true, 8, 64) => "lgbr", + (false, 16, 32) => "llhr", + (false, 16, 64) => "llghr", + (true, 16, 32) => "lhr", + (true, 16, 64) => "lghr", + (false, 32, 64) => "llgfr", + (true, 32, 64) => "lgfr", + _ => panic!("Unsupported Extend case: {:?}", self), + }; + format!("{} {}, {}", op, rd, rn) + } + &Inst::Call { link, ref info, .. } => { + let link = link.show_rru(mb_rru); + format!("brasl {}, {}", link, info.dest) + } + &Inst::CallInd { link, ref info, .. } => { + let link = link.show_rru(mb_rru); + let rn = info.rn.show_rru(mb_rru); + format!("basr {}, {}", link, rn) + } + &Inst::Ret { link } => { + let link = link.show_rru(mb_rru); + format!("br {}", link) + } + &Inst::EpiloguePlaceholder => "epilogue placeholder".to_string(), + &Inst::Jump { ref dest } => { + let dest = dest.show_rru(mb_rru); + format!("jg {}", dest) + } + &Inst::IndirectBr { rn, .. } => { + let rn = rn.show_rru(mb_rru); + format!("br {}", rn) + } + &Inst::CondBr { + ref taken, + ref not_taken, + cond, + } => { + let taken = taken.show_rru(mb_rru); + let not_taken = not_taken.show_rru(mb_rru); + let cond = cond.show_rru(mb_rru); + format!("jg{} {} ; jg {}", cond, taken, not_taken) + } + &Inst::OneWayCondBr { ref target, cond } => { + let target = target.show_rru(mb_rru); + let cond = cond.show_rru(mb_rru); + format!("jg{} {}", cond, target) + } + &Inst::Debugtrap => "debugtrap".to_string(), + &Inst::Trap { .. } => "trap".to_string(), + &Inst::TrapIf { cond, .. } => { + let cond = cond.invert().show_rru(mb_rru); + format!("j{} 6 ; trap", cond) + } + &Inst::JTSequence { + ref info, + ridx, + rtmp1, + rtmp2, + .. + } => { + let ridx = ridx.show_rru(mb_rru); + let rtmp1 = rtmp1.show_rru(mb_rru); + let rtmp2 = rtmp2.show_rru(mb_rru); + let default_target = info.default_target.show_rru(mb_rru); + format!( + concat!( + "clgfi {}, {} ; ", + "jghe {} ; ", + "sllg {}, {}, 2 ; ", + "larl {}, 18 ; ", + "lgf {}, 0({}, {}) ; ", + "agrk {}, {}, {} ; ", + "br {} ; ", + "jt_entries {:?}" + ), + ridx, + info.targets.len(), + default_target, + rtmp2, + ridx, + rtmp1, + rtmp2, + rtmp2, + rtmp1, + rtmp1, + rtmp1, + rtmp2, + rtmp1, + info.targets + ) + } + &Inst::LoadExtNameFar { + rd, + ref name, + offset, + } => { + let rd = rd.show_rru(mb_rru); + let tmp = writable_spilltmp_reg().to_reg().show_rru(mb_rru); + format!( + "bras {}, 12 ; data {} + {} ; lg {}, 0({})", + tmp, name, offset, rd, tmp + ) + } + &Inst::LoadAddr { rd, ref mem } => { + let (mem_str, mem) = + mem_finalize_for_show(mem, mb_rru, state, true, true, true, true); + + let op = match &mem { + &MemArg::BXD12 { .. } => "la", + &MemArg::BXD20 { .. } => "lay", + &MemArg::Label { .. } | &MemArg::Symbol { .. } => "larl", + _ => unreachable!(), + }; + let rd = rd.show_rru(mb_rru); + let mem = mem.show_rru(mb_rru); + format!("{}{} {}, {}", mem_str, op, rd, mem) + } + &Inst::VirtualSPOffsetAdj { offset } => { + state.virtual_sp_offset += offset; + format!("virtual_sp_offset_adjust {}", offset) + } + &Inst::ValueLabelMarker { label, reg } => { + format!("value_label {:?}, {}", label, reg.show_rru(mb_rru)) + } + &Inst::Unwind { ref inst } => { + format!("unwind {:?}", inst) + } + } + } +} + +//============================================================================= +// Label fixups and jump veneers. + +/// Different forms of label references for different instruction formats. +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum LabelUse { + /// RI-format branch. 16-bit signed offset. PC-relative, offset is imm << 1. + BranchRI, + /// RIL-format branch. 32-bit signed offset. PC-relative, offset is imm << 1. + BranchRIL, + /// 32-bit PC relative constant offset (from address of constant itself), + /// signed. Used in jump tables. + PCRel32, +} + +impl MachInstLabelUse for LabelUse { + /// Alignment for veneer code. + const ALIGN: CodeOffset = 2; + + /// Maximum PC-relative range (positive), inclusive. + fn max_pos_range(self) -> CodeOffset { + match self { + // 16-bit signed immediate, left-shifted by 1. + LabelUse::BranchRI => (1 << 20) - 1, + // This can address any valid CodeOffset. + LabelUse::BranchRIL => 0x7fff_ffff, + LabelUse::PCRel32 => 0x7fff_ffff, + } + } + + /// Maximum PC-relative range (negative). + fn max_neg_range(self) -> CodeOffset { + match self { + // 16-bit signed immediate, left-shifted by 1. + LabelUse::BranchRI => 1 << 20, + // This can address any valid CodeOffset. + LabelUse::BranchRIL => 0x8000_0000, + LabelUse::PCRel32 => 0x8000_0000, + } + } + + /// Size of window into code needed to do the patch. + fn patch_size(self) -> CodeOffset { + match self { + LabelUse::BranchRI => 4, + LabelUse::BranchRIL => 6, + LabelUse::PCRel32 => 4, + } + } + + /// Perform the patch. + fn patch(self, buffer: &mut [u8], use_offset: CodeOffset, label_offset: CodeOffset) { + let pc_rel = (label_offset as i64) - (use_offset as i64); + debug_assert!(pc_rel <= self.max_pos_range() as i64); + debug_assert!(pc_rel >= -(self.max_neg_range() as i64)); + debug_assert!(pc_rel & 1 == 0); + let pc_rel_shifted = pc_rel >> 1; + + match self { + LabelUse::BranchRI => { + buffer[2..4].clone_from_slice(&u16::to_be_bytes(pc_rel_shifted as u16)); + } + LabelUse::BranchRIL => { + buffer[2..6].clone_from_slice(&u32::to_be_bytes(pc_rel_shifted as u32)); + } + LabelUse::PCRel32 => { + let insn_word = u32::from_be_bytes([buffer[0], buffer[1], buffer[2], buffer[3]]); + let insn_word = insn_word.wrapping_add(pc_rel as u32); + buffer[0..4].clone_from_slice(&u32::to_be_bytes(insn_word)); + } + } + } + + /// Is a veneer supported for this label reference type? + fn supports_veneer(self) -> bool { + false + } + + /// How large is the veneer, if supported? + fn veneer_size(self) -> CodeOffset { + 0 + } + + /// Generate a veneer into the buffer, given that this veneer is at `veneer_offset`, and return + /// an offset and label-use for the veneer's use of the original label. + fn generate_veneer( + self, + _buffer: &mut [u8], + _veneer_offset: CodeOffset, + ) -> (CodeOffset, LabelUse) { + unreachable!(); + } +} diff --git a/cranelift/codegen/src/isa/s390x/inst/regs.rs b/cranelift/codegen/src/isa/s390x/inst/regs.rs new file mode 100644 index 0000000000..3ebba43bda --- /dev/null +++ b/cranelift/codegen/src/isa/s390x/inst/regs.rs @@ -0,0 +1,168 @@ +//! S390x ISA definitions: registers. + +use crate::settings; +use regalloc::{RealRegUniverse, Reg, RegClass, RegClassInfo, Writable, NUM_REG_CLASSES}; + +//============================================================================= +// Registers, the Universe thereof, and printing + +#[rustfmt::skip] +const GPR_INDICES: [u8; 16] = [ + // r0 and r1 reserved + 30, 31, + // r2 - r5 call-clobbered + 16, 17, 18, 19, + // r6 - r14 call-saved (order reversed) + 28, 27, 26, 25, 24, 23, 22, 21, 20, + // r15 (SP) + 29, +]; + +#[rustfmt::skip] +const FPR_INDICES: [u8; 16] = [ + // f0 - f7 as pairs + 0, 4, 1, 5, 2, 6, 3, 7, + // f8 - f15 as pairs + 8, 12, 9, 13, 10, 14, 11, 15, +]; + +/// Get a reference to a GPR (integer register). +pub fn gpr(num: u8) -> Reg { + assert!(num < 16); + Reg::new_real( + RegClass::I64, + /* enc = */ num, + /* index = */ GPR_INDICES[num as usize], + ) +} + +/// Get a writable reference to a GPR. +pub fn writable_gpr(num: u8) -> Writable { + Writable::from_reg(gpr(num)) +} + +/// Get a reference to a FPR (floating-point register). +pub fn fpr(num: u8) -> Reg { + assert!(num < 16); + Reg::new_real( + RegClass::F64, + /* enc = */ num, + /* index = */ FPR_INDICES[num as usize], + ) +} + +/// Get a writable reference to a V-register. +pub fn writable_fpr(num: u8) -> Writable { + Writable::from_reg(fpr(num)) +} + +/// Get a reference to the stack-pointer register. +pub fn stack_reg() -> Reg { + gpr(15) +} + +/// Get a writable reference to the stack-pointer register. +pub fn writable_stack_reg() -> Writable { + Writable::from_reg(stack_reg()) +} + +/// Get a reference to the first temporary, sometimes "spill temporary", register. This register is +/// used to compute the address of a spill slot when a direct offset addressing mode from FP is not +/// sufficient (+/- 2^11 words). We exclude this register from regalloc and reserve it for this +/// purpose for simplicity; otherwise we need a multi-stage analysis where we first determine how +/// many spill slots we have, then perhaps remove the reg from the pool and recompute regalloc. +/// +/// We use r1 for this because it's a scratch register but is slightly special (used for linker +/// veneers). We're free to use it as long as we don't expect it to live through call instructions. +pub fn spilltmp_reg() -> Reg { + gpr(1) +} + +/// Get a writable reference to the spilltmp reg. +pub fn writable_spilltmp_reg() -> Writable { + Writable::from_reg(spilltmp_reg()) +} + +pub fn zero_reg() -> Reg { + gpr(0) +} + +/// Create the register universe for AArch64. +pub fn create_reg_universe(_flags: &settings::Flags) -> RealRegUniverse { + let mut regs = vec![]; + let mut allocable_by_class = [None; NUM_REG_CLASSES]; + + // Numbering Scheme: we put FPRs first, then GPRs. The GPRs exclude several registers: + // r0 (we cannot use this for addressing // FIXME regalloc) + // r1 (spilltmp) + // r15 (stack pointer) + + // FPRs. + let mut base = regs.len(); + regs.push((fpr(0).to_real_reg(), "%f0".into())); + regs.push((fpr(2).to_real_reg(), "%f2".into())); + regs.push((fpr(4).to_real_reg(), "%f4".into())); + regs.push((fpr(6).to_real_reg(), "%f6".into())); + regs.push((fpr(1).to_real_reg(), "%f1".into())); + regs.push((fpr(3).to_real_reg(), "%f3".into())); + regs.push((fpr(5).to_real_reg(), "%f5".into())); + regs.push((fpr(7).to_real_reg(), "%f7".into())); + regs.push((fpr(8).to_real_reg(), "%f8".into())); + regs.push((fpr(10).to_real_reg(), "%f10".into())); + regs.push((fpr(12).to_real_reg(), "%f12".into())); + regs.push((fpr(14).to_real_reg(), "%f14".into())); + regs.push((fpr(9).to_real_reg(), "%f9".into())); + regs.push((fpr(11).to_real_reg(), "%f11".into())); + regs.push((fpr(13).to_real_reg(), "%f13".into())); + regs.push((fpr(15).to_real_reg(), "%f15".into())); + + allocable_by_class[RegClass::F64.rc_to_usize()] = Some(RegClassInfo { + first: base, + last: regs.len() - 1, + suggested_scratch: Some(fpr(1).get_index()), + }); + + // Caller-saved GPRs in the SystemV s390x ABI. + base = regs.len(); + regs.push((gpr(2).to_real_reg(), "%r2".into())); + regs.push((gpr(3).to_real_reg(), "%r3".into())); + regs.push((gpr(4).to_real_reg(), "%r4".into())); + regs.push((gpr(5).to_real_reg(), "%r5".into())); + + // Callee-saved GPRs in the SystemV s390x ABI. + // We start from r14 downwards in an attempt to allow the + // prolog to use as short a STMG as possible. + regs.push((gpr(14).to_real_reg(), "%r14".into())); + regs.push((gpr(13).to_real_reg(), "%r13".into())); + regs.push((gpr(12).to_real_reg(), "%r12".into())); + regs.push((gpr(11).to_real_reg(), "%r11".into())); + regs.push((gpr(10).to_real_reg(), "%r10".into())); + regs.push((gpr(9).to_real_reg(), "%r9".into())); + regs.push((gpr(8).to_real_reg(), "%r8".into())); + regs.push((gpr(7).to_real_reg(), "%r7".into())); + regs.push((gpr(6).to_real_reg(), "%r6".into())); + + allocable_by_class[RegClass::I64.rc_to_usize()] = Some(RegClassInfo { + first: base, + last: regs.len() - 1, + suggested_scratch: Some(gpr(13).get_index()), + }); + + // Other regs, not available to the allocator. + let allocable = regs.len(); + regs.push((gpr(15).to_real_reg(), "%r15".into())); + regs.push((gpr(0).to_real_reg(), "%r0".into())); + regs.push((gpr(1).to_real_reg(), "%r1".into())); + + // Assert sanity: the indices in the register structs must match their + // actual indices in the array. + for (i, reg) in regs.iter().enumerate() { + assert_eq!(i, reg.0.get_index()); + } + + RealRegUniverse { + regs, + allocable, + allocable_by_class, + } +} diff --git a/cranelift/codegen/src/isa/s390x/inst/unwind.rs b/cranelift/codegen/src/isa/s390x/inst/unwind.rs new file mode 100644 index 0000000000..1e2bb904db --- /dev/null +++ b/cranelift/codegen/src/isa/s390x/inst/unwind.rs @@ -0,0 +1,2 @@ +#[cfg(feature = "unwind")] +pub(crate) mod systemv; diff --git a/cranelift/codegen/src/isa/s390x/inst/unwind/systemv.rs b/cranelift/codegen/src/isa/s390x/inst/unwind/systemv.rs new file mode 100644 index 0000000000..f5ff00cbd1 --- /dev/null +++ b/cranelift/codegen/src/isa/s390x/inst/unwind/systemv.rs @@ -0,0 +1,197 @@ +//! Unwind information for System V ABI (s390x). + +use crate::isa::unwind::systemv::RegisterMappingError; +use gimli::{write::CommonInformationEntry, Encoding, Format, Register}; +use regalloc::{Reg, RegClass}; + +/// Creates a new s390x common information entry (CIE). +pub fn create_cie() -> CommonInformationEntry { + use gimli::write::CallFrameInstruction; + + let mut entry = CommonInformationEntry::new( + Encoding { + address_size: 8, + format: Format::Dwarf32, + version: 1, + }, + 1, // Code alignment factor + -8, // Data alignment factor + Register(14), // Return address column - register %r14 + ); + + // Every frame will start with the call frame address (CFA) at %r15 + 160. + entry.add_instruction(CallFrameInstruction::Cfa(Register(15), 160)); + + entry +} + +/// Map Cranelift registers to their corresponding Gimli registers. +pub fn map_reg(reg: Reg) -> Result { + const GPR_MAP: [gimli::Register; 16] = [ + Register(0), + Register(1), + Register(2), + Register(3), + Register(4), + Register(5), + Register(6), + Register(7), + Register(8), + Register(9), + Register(10), + Register(11), + Register(12), + Register(13), + Register(14), + Register(15), + ]; + const FPR_MAP: [gimli::Register; 16] = [ + Register(16), + Register(20), + Register(17), + Register(21), + Register(18), + Register(22), + Register(19), + Register(23), + Register(24), + Register(28), + Register(25), + Register(29), + Register(26), + Register(30), + Register(27), + Register(31), + ]; + + match reg.get_class() { + RegClass::I64 => Ok(GPR_MAP[reg.get_hw_encoding() as usize]), + RegClass::F64 => Ok(FPR_MAP[reg.get_hw_encoding() as usize]), + _ => Err(RegisterMappingError::UnsupportedRegisterBank("class?")), + } +} + +pub(crate) struct RegisterMapper; + +impl crate::isa::unwind::systemv::RegisterMapper for RegisterMapper { + fn map(&self, reg: Reg) -> Result { + Ok(map_reg(reg)?.0) + } + fn sp(&self) -> u16 { + Register(15).0 + } +} + +#[cfg(test)] +mod tests { + use crate::cursor::{Cursor, FuncCursor}; + use crate::ir::{ + types, AbiParam, ExternalName, Function, InstBuilder, Signature, StackSlotData, + StackSlotKind, + }; + use crate::isa::{lookup, CallConv}; + use crate::settings::{builder, Flags}; + use crate::Context; + use gimli::write::Address; + use std::str::FromStr; + use target_lexicon::triple; + + #[test] + fn test_simple_func() { + let isa = lookup(triple!("s390x")) + .expect("expect s390x ISA") + .finish(Flags::new(builder())); + + let mut context = Context::for_function(create_function( + CallConv::SystemV, + Some(StackSlotData::new(StackSlotKind::ExplicitSlot, 64)), + )); + + context.compile(&*isa).expect("expected compilation"); + + let fde = match context + .create_unwind_info(isa.as_ref()) + .expect("can create unwind info") + { + Some(crate::isa::unwind::UnwindInfo::SystemV(info)) => { + info.to_fde(Address::Constant(1234)) + } + _ => panic!("expected unwind information"), + }; + + assert_eq!(format!("{:?}", fde), "FrameDescriptionEntry { address: Constant(1234), length: 10, lsda: None, instructions: [(4, CfaOffset(224))] }"); + } + + fn create_function(call_conv: CallConv, stack_slot: Option) -> Function { + let mut func = + Function::with_name_signature(ExternalName::user(0, 0), Signature::new(call_conv)); + + let block0 = func.dfg.make_block(); + let mut pos = FuncCursor::new(&mut func); + pos.insert_block(block0); + pos.ins().return_(&[]); + + if let Some(stack_slot) = stack_slot { + func.stack_slots.push(stack_slot); + } + + func + } + + #[test] + fn test_multi_return_func() { + let isa = lookup(triple!("s390x")) + .expect("expect s390x ISA") + .finish(Flags::new(builder())); + + let mut context = Context::for_function(create_multi_return_function( + CallConv::SystemV, + Some(StackSlotData::new(StackSlotKind::ExplicitSlot, 64)), + )); + + context.compile(&*isa).expect("expected compilation"); + + let fde = match context + .create_unwind_info(isa.as_ref()) + .expect("can create unwind info") + { + Some(crate::isa::unwind::UnwindInfo::SystemV(info)) => { + info.to_fde(Address::Constant(4321)) + } + _ => panic!("expected unwind information"), + }; + + assert_eq!(format!("{:?}", fde), "FrameDescriptionEntry { address: Constant(4321), length: 26, lsda: None, instructions: [(4, CfaOffset(224))] }"); + } + + fn create_multi_return_function( + call_conv: CallConv, + stack_slot: Option, + ) -> Function { + let mut sig = Signature::new(call_conv); + sig.params.push(AbiParam::new(types::I32)); + let mut func = Function::with_name_signature(ExternalName::user(0, 0), sig); + + let block0 = func.dfg.make_block(); + let v0 = func.dfg.append_block_param(block0, types::I32); + let block1 = func.dfg.make_block(); + let block2 = func.dfg.make_block(); + + let mut pos = FuncCursor::new(&mut func); + pos.insert_block(block0); + pos.ins().brnz(v0, block2, &[]); + pos.ins().jump(block1, &[]); + + pos.insert_block(block1); + pos.ins().return_(&[]); + + pos.insert_block(block2); + pos.ins().return_(&[]); + + if let Some(stack_slot) = stack_slot { + func.stack_slots.push(stack_slot); + } + + func + } +} diff --git a/cranelift/codegen/src/isa/s390x/lower.rs b/cranelift/codegen/src/isa/s390x/lower.rs new file mode 100644 index 0000000000..26276f0434 --- /dev/null +++ b/cranelift/codegen/src/isa/s390x/lower.rs @@ -0,0 +1,2839 @@ +//! Lowering rules for S390x. + +use crate::ir::condcodes::{FloatCC, IntCC}; +use crate::ir::Inst as IRInst; +use crate::ir::{types, Endianness, InstructionData, MemFlags, Opcode, TrapCode, Type}; +use crate::isa::s390x::abi::*; +use crate::isa::s390x::inst::*; +use crate::isa::s390x::S390xBackend; +use crate::machinst::lower::*; +use crate::machinst::*; +use crate::settings::Flags; +use crate::CodegenResult; +use alloc::boxed::Box; +use alloc::vec::Vec; +use core::convert::TryFrom; +use regalloc::{Reg, Writable}; +use smallvec::SmallVec; + +//============================================================================= +// Helpers for instruction lowering. + +fn ty_is_int(ty: Type) -> bool { + match ty { + types::B1 | types::B8 | types::B16 | types::B32 | types::B64 => true, + types::I8 | types::I16 | types::I32 | types::I64 | types::R64 => true, + types::F32 | types::F64 => false, + types::IFLAGS | types::FFLAGS => panic!("Unexpected flags type"), + _ => panic!("ty_is_int() on unknown type: {:?}", ty), + } +} + +fn ty_is_float(ty: Type) -> bool { + !ty_is_int(ty) +} + +fn choose_32_64(ty: Type, op32: T, op64: T) -> T { + let bits = ty_bits(ty); + if bits <= 32 { + op32 + } else if bits == 64 { + op64 + } else { + panic!("choose_32_64 on > 64 bits!") + } +} + +//============================================================================ +// Lowering: convert instruction inputs to forms that we can use. + +/// Lower an instruction input to a 64-bit constant, if possible. +fn input_matches_const>(ctx: &mut C, input: InsnInput) -> Option { + let input = ctx.get_input_as_source_or_const(input.insn, input.input); + input.constant +} + +/// Return false if instruction input cannot have the value Imm, true otherwise. +fn input_maybe_imm>(ctx: &mut C, input: InsnInput, imm: u64) -> bool { + if let Some(c) = input_matches_const(ctx, input) { + let ty = ctx.input_ty(input.insn, input.input); + let from_bits = ty_bits(ty) as u8; + let mask = if from_bits < 64 { + (1u64 << ty_bits(ty)) - 1 + } else { + 0xffff_ffff_ffff_ffff + }; + c & mask == imm & mask + } else { + true + } +} + +/// Lower an instruction input to a 16-bit signed constant, if possible. +fn input_matches_simm16>(ctx: &mut C, input: InsnInput) -> Option { + if let Some(imm_value) = input_matches_const(ctx, input) { + if let Ok(imm) = i16::try_from(imm_value as i64) { + return Some(imm); + } + } + None +} + +/// Lower an instruction input to a 32-bit signed constant, if possible. +fn input_matches_simm32>(ctx: &mut C, input: InsnInput) -> Option { + if let Some(imm_value) = input_matches_const(ctx, input) { + if let Ok(imm) = i32::try_from(imm_value as i64) { + return Some(imm); + } + } + None +} + +/// Lower an instruction input to a 32-bit unsigned constant, if possible. +fn input_matches_uimm32>(ctx: &mut C, input: InsnInput) -> Option { + if let Some(imm_value) = input_matches_const(ctx, input) { + if let Ok(imm) = u32::try_from(imm_value) { + return Some(imm); + } + } + None +} + +/// Lower a negated instruction input to a 16-bit signed constant, if possible. +fn negated_input_matches_simm16>( + ctx: &mut C, + input: InsnInput, +) -> Option { + if let Some(imm_value) = input_matches_const(ctx, input) { + if let Ok(imm) = i16::try_from(-(imm_value as i64)) { + return Some(imm); + } + } + None +} + +/// Lower a negated instruction input to a 32-bit signed constant, if possible. +fn negated_input_matches_simm32>( + ctx: &mut C, + input: InsnInput, +) -> Option { + if let Some(imm_value) = input_matches_const(ctx, input) { + if let Ok(imm) = i32::try_from(-(imm_value as i64)) { + return Some(imm); + } + } + None +} + +/// Lower an instruction input to a 16-bit shifted constant, if possible. +fn input_matches_uimm16shifted>( + ctx: &mut C, + input: InsnInput, +) -> Option { + if let Some(imm_value) = input_matches_const(ctx, input) { + return UImm16Shifted::maybe_from_u64(imm_value); + } + None +} + +/// Lower an instruction input to a 32-bit shifted constant, if possible. +fn input_matches_uimm32shifted>( + ctx: &mut C, + input: InsnInput, +) -> Option { + if let Some(imm_value) = input_matches_const(ctx, input) { + return UImm32Shifted::maybe_from_u64(imm_value); + } + None +} + +/// Lower an instruction input to a 16-bit inverted shifted constant, if possible. +fn input_matches_uimm16shifted_inv>( + ctx: &mut C, + input: InsnInput, +) -> Option { + if let Some(imm_value) = input_matches_const(ctx, input) { + if let Some(imm) = UImm16Shifted::maybe_from_u64(!imm_value) { + return Some(imm.negate_bits()); + } + } + None +} + +/// Lower an instruction input to a 32-bit inverted shifted constant, if possible. +fn input_matches_uimm32shifted_inv>( + ctx: &mut C, + input: InsnInput, +) -> Option { + if let Some(imm_value) = input_matches_const(ctx, input) { + if let Some(imm) = UImm32Shifted::maybe_from_u64(!imm_value) { + return Some(imm.negate_bits()); + } + } + None +} + +/// Checks for an instance of `op` feeding the given input. +fn input_matches_insn>( + c: &mut C, + input: InsnInput, + op: Opcode, +) -> Option { + let inputs = c.get_input_as_source_or_const(input.insn, input.input); + if let Some((src_inst, _)) = inputs.inst { + let data = c.data(src_inst); + if data.opcode() == op { + return Some(src_inst); + } + } + None +} + +/// Checks for an instance of `op` feeding the given input, possibly via a conversion `conv` (e.g., +/// Bint or a bitcast). +fn input_matches_insn_via_conv>( + c: &mut C, + input: InsnInput, + op: Opcode, + conv: Opcode, +) -> Option { + let inputs = c.get_input_as_source_or_const(input.insn, input.input); + if let Some((src_inst, _)) = inputs.inst { + let data = c.data(src_inst); + if data.opcode() == op { + return Some(src_inst); + } + if data.opcode() == conv { + let inputs = c.get_input_as_source_or_const(src_inst, 0); + if let Some((src_inst, _)) = inputs.inst { + let data = c.data(src_inst); + if data.opcode() == op { + return Some(src_inst); + } + } + } + } + None +} + +fn input_matches_load_insn>( + ctx: &mut C, + input: InsnInput, + op: Opcode, +) -> Option { + if let Some(insn) = input_matches_insn(ctx, input, op) { + let inputs: SmallVec<[InsnInput; 4]> = (0..ctx.num_inputs(insn)) + .map(|i| InsnInput { insn, input: i }) + .collect(); + let off = ctx.data(insn).load_store_offset().unwrap(); + let flags = ctx.memflags(insn).unwrap(); + let endianness = flags.endianness(Endianness::Big); + if endianness == Endianness::Big { + let mem = lower_address(ctx, &inputs[..], off, flags); + ctx.sink_inst(insn); + return Some(mem); + } + } + None +} + +fn input_matches_mem>(ctx: &mut C, input: InsnInput) -> Option { + if ty_bits(ctx.input_ty(input.insn, input.input)) >= 32 { + return input_matches_load_insn(ctx, input, Opcode::Load); + } + None +} + +fn input_matches_sext16_mem>( + ctx: &mut C, + input: InsnInput, +) -> Option { + if ty_bits(ctx.input_ty(input.insn, input.input)) == 16 { + return input_matches_load_insn(ctx, input, Opcode::Load); + } + if ty_bits(ctx.input_ty(input.insn, input.input)) >= 32 { + return input_matches_load_insn(ctx, input, Opcode::Sload16); + } + None +} + +fn input_matches_sext32_mem>( + ctx: &mut C, + input: InsnInput, +) -> Option { + if ty_bits(ctx.input_ty(input.insn, input.input)) > 32 { + return input_matches_load_insn(ctx, input, Opcode::Sload32); + } + None +} + +fn input_matches_sext32_reg>(ctx: &mut C, input: InsnInput) -> Option { + if let Some(insn) = input_matches_insn(ctx, input, Opcode::Sextend) { + if ty_bits(ctx.input_ty(insn, 0)) == 32 { + let reg = put_input_in_reg(ctx, InsnInput { insn, input: 0 }, NarrowValueMode::None); + return Some(reg); + } + } + None +} + +fn input_matches_uext32_reg>(ctx: &mut C, input: InsnInput) -> Option { + if let Some(insn) = input_matches_insn(ctx, input, Opcode::Uextend) { + if ty_bits(ctx.input_ty(insn, 0)) == 32 { + let reg = put_input_in_reg(ctx, InsnInput { insn, input: 0 }, NarrowValueMode::None); + return Some(reg); + } + } + None +} + +fn input_matches_uext16_mem>( + ctx: &mut C, + input: InsnInput, +) -> Option { + if ty_bits(ctx.input_ty(input.insn, input.input)) == 16 { + return input_matches_load_insn(ctx, input, Opcode::Load); + } + if ty_bits(ctx.input_ty(input.insn, input.input)) >= 32 { + return input_matches_load_insn(ctx, input, Opcode::Uload16); + } + None +} + +fn input_matches_uext32_mem>( + ctx: &mut C, + input: InsnInput, +) -> Option { + if ty_bits(ctx.input_ty(input.insn, input.input)) > 32 { + return input_matches_load_insn(ctx, input, Opcode::Uload32); + } + None +} + +//============================================================================ +// Lowering: force instruction input into a register + +/// How to handle narrow values loaded into registers; see note on `narrow_mode` +/// parameter to `put_input_in_*` below. +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +enum NarrowValueMode { + None, + /// Zero-extend to 32 bits if original is < 32 bits. + ZeroExtend32, + /// Sign-extend to 32 bits if original is < 32 bits. + SignExtend32, + /// Zero-extend to 64 bits if original is < 64 bits. + ZeroExtend64, + /// Sign-extend to 64 bits if original is < 64 bits. + SignExtend64, +} + +fn extend_memory_to_reg>( + ctx: &mut C, + mem: MemArg, + from_ty: Type, + to_ty: Type, + signed: bool, +) -> Reg { + let rd = ctx.alloc_tmp(to_ty).only_reg().unwrap(); + ctx.emit(match (signed, ty_bits(to_ty), ty_bits(from_ty)) { + (false, 32, 8) => Inst::Load32ZExt8 { rd, mem }, + (false, 32, 16) => Inst::Load32ZExt16 { rd, mem }, + (true, 32, 8) => Inst::Load32SExt8 { rd, mem }, + (true, 32, 16) => Inst::Load32SExt16 { rd, mem }, + (false, 64, 8) => Inst::Load64ZExt8 { rd, mem }, + (false, 64, 16) => Inst::Load64ZExt16 { rd, mem }, + (false, 64, 32) => Inst::Load64ZExt32 { rd, mem }, + (true, 64, 8) => Inst::Load64SExt8 { rd, mem }, + (true, 64, 16) => Inst::Load64SExt16 { rd, mem }, + (true, 64, 32) => Inst::Load64SExt32 { rd, mem }, + _ => panic!("Unsupported size in load"), + }); + rd.to_reg() +} + +/// Sign-extend the low `from_bits` bits of `value` to a full u64. +fn sign_extend_to_u64(value: u64, from_bits: u8) -> u64 { + assert!(from_bits <= 64); + if from_bits >= 64 { + value + } else { + (((value << (64 - from_bits)) as i64) >> (64 - from_bits)) as u64 + } +} + +/// Zero-extend the low `from_bits` bits of `value` to a full u64. +fn zero_extend_to_u64(value: u64, from_bits: u8) -> u64 { + assert!(from_bits <= 64); + if from_bits >= 64 { + value + } else { + value & ((1u64 << from_bits) - 1) + } +} + +/// Lower an instruction input to a reg. +/// +/// The given register will be extended appropriately, according to +/// `narrow_mode` and the input's type. +fn put_input_in_reg>( + ctx: &mut C, + input: InsnInput, + narrow_mode: NarrowValueMode, +) -> Reg { + let signed = match narrow_mode { + NarrowValueMode::SignExtend32 | NarrowValueMode::SignExtend64 => true, + NarrowValueMode::ZeroExtend32 | NarrowValueMode::ZeroExtend64 => false, + _ => false, + }; + let ty = ctx.input_ty(input.insn, input.input); + let from_bits = ty_bits(ty) as u8; + let ext_ty = match narrow_mode { + NarrowValueMode::None => ty, + NarrowValueMode::ZeroExtend32 | NarrowValueMode::SignExtend32 => types::I32, + NarrowValueMode::ZeroExtend64 | NarrowValueMode::SignExtend64 => types::I64, + }; + let to_bits = ty_bits(ext_ty) as u8; + assert!(to_bits >= from_bits); + + if let Some(c) = input_matches_const(ctx, input) { + let extended = if from_bits == to_bits { + c + } else if signed { + sign_extend_to_u64(c, from_bits) + } else { + zero_extend_to_u64(c, from_bits) + }; + let masked = zero_extend_to_u64(extended, to_bits); + + // Generate constants fresh at each use to minimize long-range register pressure. + let to_reg = ctx.alloc_tmp(ext_ty).only_reg().unwrap(); + for inst in Inst::gen_constant(ValueRegs::one(to_reg), masked as u128, ext_ty, |ty| { + ctx.alloc_tmp(ty).only_reg().unwrap() + }) + .into_iter() + { + ctx.emit(inst); + } + to_reg.to_reg() + } else if to_bits == from_bits { + ctx.put_input_in_regs(input.insn, input.input) + .only_reg() + .unwrap() + } else if let Some(mem) = input_matches_load_insn(ctx, input, Opcode::Load) { + extend_memory_to_reg(ctx, mem, ty, ext_ty, signed) + } else { + let rd = ctx.alloc_tmp(ext_ty).only_reg().unwrap(); + let rn = ctx + .put_input_in_regs(input.insn, input.input) + .only_reg() + .unwrap(); + ctx.emit(Inst::Extend { + rd, + rn, + signed, + from_bits, + to_bits, + }); + rd.to_reg() + } +} + +//============================================================================ +// Lowering: addressing mode support. Takes instruction directly, rather +// than an `InsnInput`, to do more introspection. + +/// Lower the address of a load or store. +fn lower_address>( + ctx: &mut C, + addends: &[InsnInput], + offset: i32, + flags: MemFlags, +) -> MemArg { + // Handle one reg and offset. + if addends.len() == 1 { + if offset == 0 { + if let Some(add) = input_matches_insn(ctx, addends[0], Opcode::Iadd) { + debug_assert_eq!(ctx.output_ty(add, 0), types::I64); + let add_inputs = &[ + InsnInput { + insn: add, + input: 0, + }, + InsnInput { + insn: add, + input: 1, + }, + ]; + + let ra = put_input_in_reg(ctx, add_inputs[0], NarrowValueMode::None); + let rb = put_input_in_reg(ctx, add_inputs[1], NarrowValueMode::None); + return MemArg::reg_plus_reg(ra, rb, flags); + } + } + + if let Some(symbol) = input_matches_insn(ctx, addends[0], Opcode::SymbolValue) { + let (extname, dist, ext_offset) = ctx.symbol_value(symbol).unwrap(); + let ext_offset = ext_offset + i64::from(offset); + if dist == RelocDistance::Near && (ext_offset & 1) == 0 { + if let Ok(offset) = i32::try_from(ext_offset) { + return MemArg::Symbol { + name: Box::new(extname.clone()), + offset, + flags, + }; + } + } + } + + let reg = put_input_in_reg(ctx, addends[0], NarrowValueMode::None); + return MemArg::reg_plus_off(reg, offset as i64, flags); + } + + // Handle two regs and a zero offset. + if addends.len() == 2 && offset == 0 { + let ra = put_input_in_reg(ctx, addends[0], NarrowValueMode::None); + let rb = put_input_in_reg(ctx, addends[1], NarrowValueMode::None); + return MemArg::reg_plus_reg(ra, rb, flags); + } + + // Otherwise, generate add instructions. + let addr = ctx.alloc_tmp(types::I64).only_reg().unwrap(); + + // Get the const into a reg. + lower_constant_u64(ctx, addr.clone(), offset as u64); + + // Add each addend to the address. + for addend in addends { + let reg = put_input_in_reg(ctx, *addend, NarrowValueMode::None); + + ctx.emit(Inst::AluRRR { + alu_op: ALUOp::Add64, + rd: addr.clone(), + rn: addr.to_reg(), + rm: reg.clone(), + }); + } + + MemArg::reg(addr.to_reg(), flags) +} + +//============================================================================ +// Lowering: generating constants. + +fn lower_constant_u64>(ctx: &mut C, rd: Writable, value: u64) { + for inst in Inst::load_constant64(rd, value) { + ctx.emit(inst); + } +} + +fn lower_constant_u32>(ctx: &mut C, rd: Writable, value: u32) { + for inst in Inst::load_constant32(rd, value) { + ctx.emit(inst); + } +} + +fn lower_constant_f32>(ctx: &mut C, rd: Writable, value: f32) { + ctx.emit(Inst::load_fp_constant32(rd, value)); +} + +fn lower_constant_f64>(ctx: &mut C, rd: Writable, value: f64) { + ctx.emit(Inst::load_fp_constant64(rd, value)); +} + +//============================================================================= +// Lowering: comparisons + +/// Determines whether this condcode interprets inputs as signed or +/// unsigned. See the documentation for the `icmp` instruction in +/// cranelift-codegen/meta/src/shared/instructions.rs for further insights +/// into this. +pub fn condcode_is_signed(cc: IntCC) -> bool { + match cc { + IntCC::Equal => false, + IntCC::NotEqual => false, + IntCC::SignedGreaterThanOrEqual => true, + IntCC::SignedGreaterThan => true, + IntCC::SignedLessThanOrEqual => true, + IntCC::SignedLessThan => true, + IntCC::UnsignedGreaterThanOrEqual => false, + IntCC::UnsignedGreaterThan => false, + IntCC::UnsignedLessThanOrEqual => false, + IntCC::UnsignedLessThan => false, + IntCC::Overflow => true, + IntCC::NotOverflow => true, + } +} + +fn lower_icmp_to_flags>( + ctx: &mut C, + insn: IRInst, + is_signed: bool, + may_sink_memory: bool, +) { + let ty = ctx.input_ty(insn, 0); + let bits = ty_bits(ty); + let narrow_mode = match (bits <= 32, is_signed) { + (true, true) => NarrowValueMode::SignExtend32, + (true, false) => NarrowValueMode::ZeroExtend32, + (false, true) => NarrowValueMode::SignExtend64, + (false, false) => NarrowValueMode::ZeroExtend64, + }; + let inputs = [ + InsnInput { + insn: insn, + input: 0, + }, + InsnInput { + insn: insn, + input: 1, + }, + ]; + let ty = ctx.input_ty(insn, 0); + let rn = put_input_in_reg(ctx, inputs[0], narrow_mode); + if is_signed { + let op = choose_32_64(ty, CmpOp::CmpS32, CmpOp::CmpS64); + // Try matching immedate operand. + if let Some(imm) = input_matches_simm16(ctx, inputs[1]) { + return ctx.emit(Inst::CmpRSImm16 { op, rn, imm }); + } + if let Some(imm) = input_matches_simm32(ctx, inputs[1]) { + return ctx.emit(Inst::CmpRSImm32 { op, rn, imm }); + } + // If sinking memory loads is allowed, try matching memory operand. + if may_sink_memory { + if let Some(mem) = input_matches_mem(ctx, inputs[1]) { + return ctx.emit(Inst::CmpRX { op, rn, mem }); + } + if let Some(mem) = input_matches_sext16_mem(ctx, inputs[1]) { + let op = choose_32_64(ty, CmpOp::CmpS32Ext16, CmpOp::CmpS64Ext16); + return ctx.emit(Inst::CmpRX { op, rn, mem }); + } + if let Some(mem) = input_matches_sext32_mem(ctx, inputs[1]) { + return ctx.emit(Inst::CmpRX { + op: CmpOp::CmpS64Ext32, + rn, + mem, + }); + } + } + // Try matching sign-extension in register. + if let Some(rm) = input_matches_sext32_reg(ctx, inputs[1]) { + return ctx.emit(Inst::CmpRR { + op: CmpOp::CmpS64Ext32, + rn, + rm, + }); + } + // If no special case matched above, fall back to a register compare. + let rm = put_input_in_reg(ctx, inputs[1], narrow_mode); + return ctx.emit(Inst::CmpRR { op, rn, rm }); + } else { + let op = choose_32_64(ty, CmpOp::CmpL32, CmpOp::CmpL64); + // Try matching immedate operand. + if let Some(imm) = input_matches_uimm32(ctx, inputs[1]) { + return ctx.emit(Inst::CmpRUImm32 { op, rn, imm }); + } + // If sinking memory loads is allowed, try matching memory operand. + if may_sink_memory { + if let Some(mem) = input_matches_mem(ctx, inputs[1]) { + return ctx.emit(Inst::CmpRX { op, rn, mem }); + } + if let Some(mem) = input_matches_uext16_mem(ctx, inputs[1]) { + match &mem { + &MemArg::Symbol { .. } => { + let op = choose_32_64(ty, CmpOp::CmpL32Ext16, CmpOp::CmpL64Ext16); + return ctx.emit(Inst::CmpRX { op, rn, mem }); + } + _ => { + let reg_ty = choose_32_64(ty, types::I32, types::I64); + let rm = extend_memory_to_reg(ctx, mem, ty, reg_ty, false); + return ctx.emit(Inst::CmpRR { op, rn, rm }); + } + } + } + if let Some(mem) = input_matches_uext32_mem(ctx, inputs[1]) { + return ctx.emit(Inst::CmpRX { + op: CmpOp::CmpL64Ext32, + rn, + mem, + }); + } + } + // Try matching zero-extension in register. + if let Some(rm) = input_matches_uext32_reg(ctx, inputs[1]) { + return ctx.emit(Inst::CmpRR { + op: CmpOp::CmpL64Ext32, + rn, + rm, + }); + } + // If no special case matched above, fall back to a register compare. + let rm = put_input_in_reg(ctx, inputs[1], narrow_mode); + return ctx.emit(Inst::CmpRR { op, rn, rm }); + } +} + +fn lower_fcmp_to_flags>(ctx: &mut C, insn: IRInst) { + let ty = ctx.input_ty(insn, 0); + let bits = ty_bits(ty); + let inputs = [ + InsnInput { + insn: insn, + input: 0, + }, + InsnInput { + insn: insn, + input: 1, + }, + ]; + let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); + let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None); + match bits { + 32 => { + ctx.emit(Inst::FpuCmp32 { rn, rm }); + } + 64 => { + ctx.emit(Inst::FpuCmp64 { rn, rm }); + } + _ => panic!("Unknown float size"), + } +} + +fn lower_boolean_to_flags>(ctx: &mut C, input: InsnInput) -> Cond { + if let Some(icmp_insn) = input_matches_insn_via_conv(ctx, input, Opcode::Icmp, Opcode::Bint) { + // FIXME: If the Icmp (and Bint) only have a single use, we can still allow sinking memory + let may_sink_memory = false; + let condcode = ctx.data(icmp_insn).cond_code().unwrap(); + let is_signed = condcode_is_signed(condcode); + lower_icmp_to_flags(ctx, icmp_insn, is_signed, may_sink_memory); + Cond::from_intcc(condcode) + } else if let Some(fcmp_insn) = + input_matches_insn_via_conv(ctx, input, Opcode::Fcmp, Opcode::Bint) + { + let condcode = ctx.data(fcmp_insn).fp_cond_code().unwrap(); + lower_fcmp_to_flags(ctx, fcmp_insn); + Cond::from_floatcc(condcode) + } else { + let ty = ctx.input_ty(input.insn, input.input); + let narrow_mode = if ty.bits() < 32 { + NarrowValueMode::ZeroExtend32 + } else { + NarrowValueMode::None + }; + let rn = put_input_in_reg(ctx, input, narrow_mode); + let op = choose_32_64(ty, CmpOp::CmpS32, CmpOp::CmpS64); + ctx.emit(Inst::CmpRSImm16 { op, rn, imm: 0 }); + Cond::from_intcc(IntCC::NotEqual) + } +} + +fn lower_flags_to_bool_result>( + ctx: &mut C, + cond: Cond, + rd: Writable, + ty: Type, +) { + if ty_bits(ty) == 1 { + lower_constant_u32(ctx, rd, 0); + ctx.emit(Inst::CMov32SImm16 { rd, cond, imm: 1 }); + } else if ty_bits(ty) < 64 { + lower_constant_u32(ctx, rd, 0); + ctx.emit(Inst::CMov32SImm16 { rd, cond, imm: -1 }); + } else { + lower_constant_u64(ctx, rd, 0); + ctx.emit(Inst::CMov64SImm16 { rd, cond, imm: -1 }); + } +} + +//============================================================================ +// Lowering: main entry point for lowering a instruction + +fn lower_insn_to_regs>( + ctx: &mut C, + insn: IRInst, + flags: &Flags, +) -> CodegenResult<()> { + let op = ctx.data(insn).opcode(); + let inputs: SmallVec<[InsnInput; 4]> = (0..ctx.num_inputs(insn)) + .map(|i| InsnInput { insn, input: i }) + .collect(); + let outputs: SmallVec<[InsnOutput; 2]> = (0..ctx.num_outputs(insn)) + .map(|i| InsnOutput { insn, output: i }) + .collect(); + let ty = if outputs.len() > 0 { + Some(ctx.output_ty(insn, 0)) + } else { + None + }; + + match op { + Opcode::Nop => { + // Nothing. + } + + Opcode::Copy | Opcode::Ireduce | Opcode::Breduce => { + // Smaller ints / bools have the high bits undefined, so any reduce + // operation is simply a copy. + let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); + let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); + let ty = ctx.input_ty(insn, 0); + ctx.emit(Inst::gen_move(rd, rn, ty)); + } + + Opcode::Iconst | Opcode::Bconst | Opcode::Null => { + let value = ctx.get_constant(insn).unwrap(); + let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); + let ty = ty.unwrap(); + if ty.bits() <= 32 { + lower_constant_u32(ctx, rd, value as u32); + } else { + lower_constant_u64(ctx, rd, value); + } + } + Opcode::F32const => { + let value = f32::from_bits(ctx.get_constant(insn).unwrap() as u32); + let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); + lower_constant_f32(ctx, rd, value); + } + Opcode::F64const => { + let value = f64::from_bits(ctx.get_constant(insn).unwrap()); + let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); + lower_constant_f64(ctx, rd, value); + } + + Opcode::Iadd => { + let ty = ty.unwrap(); + let alu_op = choose_32_64(ty, ALUOp::Add32, ALUOp::Add64); + let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); + let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); + if let Some(imm) = input_matches_simm16(ctx, inputs[1]) { + ctx.emit(Inst::AluRRSImm16 { + alu_op, + rd, + rn, + imm, + }); + } else if let Some(imm) = input_matches_simm32(ctx, inputs[1]) { + ctx.emit(Inst::gen_move(rd, rn, ty)); + ctx.emit(Inst::AluRSImm32 { alu_op, rd, imm }); + } else if let Some(mem) = input_matches_mem(ctx, inputs[1]) { + ctx.emit(Inst::gen_move(rd, rn, ty)); + ctx.emit(Inst::AluRX { alu_op, rd, mem }); + } else if let Some(mem) = input_matches_sext16_mem(ctx, inputs[1]) { + let alu_op = choose_32_64(ty, ALUOp::Add32Ext16, ALUOp::Add64Ext16); + ctx.emit(Inst::gen_move(rd, rn, ty)); + ctx.emit(Inst::AluRX { alu_op, rd, mem }); + } else if let Some(mem) = input_matches_sext32_mem(ctx, inputs[1]) { + ctx.emit(Inst::gen_move(rd, rn, ty)); + ctx.emit(Inst::AluRX { + alu_op: ALUOp::Add64Ext32, + rd, + mem, + }); + } else if let Some(rm) = input_matches_sext32_reg(ctx, inputs[1]) { + ctx.emit(Inst::gen_move(rd, rn, ty)); + ctx.emit(Inst::AluRR { + alu_op: ALUOp::Add64Ext32, + rd, + rm, + }); + } else { + let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None); + ctx.emit(Inst::AluRRR { alu_op, rd, rn, rm }); + } + } + Opcode::Isub => { + let ty = ty.unwrap(); + let alu_op = choose_32_64(ty, ALUOp::Sub32, ALUOp::Sub64); + let neg_op = choose_32_64(ty, ALUOp::Add32, ALUOp::Add64); + let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); + let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); + if let Some(imm) = negated_input_matches_simm16(ctx, inputs[1]) { + ctx.emit(Inst::AluRRSImm16 { + alu_op: neg_op, + rd, + rn, + imm, + }); + } else if let Some(imm) = negated_input_matches_simm32(ctx, inputs[1]) { + ctx.emit(Inst::gen_move(rd, rn, ty)); + ctx.emit(Inst::AluRSImm32 { + alu_op: neg_op, + rd, + imm, + }); + } else if let Some(mem) = input_matches_mem(ctx, inputs[1]) { + ctx.emit(Inst::gen_move(rd, rn, ty)); + ctx.emit(Inst::AluRX { alu_op, rd, mem }); + } else if let Some(mem) = input_matches_sext16_mem(ctx, inputs[1]) { + let alu_op = choose_32_64(ty, ALUOp::Sub32Ext16, ALUOp::Sub64Ext16); + ctx.emit(Inst::gen_move(rd, rn, ty)); + ctx.emit(Inst::AluRX { alu_op, rd, mem }); + } else if let Some(mem) = input_matches_sext32_mem(ctx, inputs[1]) { + ctx.emit(Inst::gen_move(rd, rn, ty)); + ctx.emit(Inst::AluRX { + alu_op: ALUOp::Sub64Ext32, + rd, + mem, + }); + } else if let Some(rm) = input_matches_sext32_reg(ctx, inputs[1]) { + ctx.emit(Inst::gen_move(rd, rn, ty)); + ctx.emit(Inst::AluRR { + alu_op: ALUOp::Sub64Ext32, + rd, + rm, + }); + } else { + let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None); + ctx.emit(Inst::AluRRR { alu_op, rd, rn, rm }); + } + } + + Opcode::UaddSat | Opcode::SaddSat => unimplemented!(), + Opcode::UsubSat | Opcode::SsubSat => unimplemented!(), + + Opcode::Iabs => { + let ty = ty.unwrap(); + let op = choose_32_64(ty, UnaryOp::Abs32, UnaryOp::Abs64); + let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); + if let Some(rn) = input_matches_sext32_reg(ctx, inputs[0]) { + ctx.emit(Inst::UnaryRR { + op: UnaryOp::Abs64Ext32, + rd, + rn, + }); + } else { + let narrow_mode = if ty.bits() < 32 { + NarrowValueMode::SignExtend32 + } else { + NarrowValueMode::None + }; + let rn = put_input_in_reg(ctx, inputs[0], narrow_mode); + ctx.emit(Inst::UnaryRR { op, rd, rn }); + } + } + Opcode::Ineg => { + let ty = ty.unwrap(); + let op = choose_32_64(ty, UnaryOp::Neg32, UnaryOp::Neg64); + let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); + if let Some(rn) = input_matches_sext32_reg(ctx, inputs[0]) { + ctx.emit(Inst::UnaryRR { + op: UnaryOp::Neg64Ext32, + rd, + rn, + }); + } else { + let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); + ctx.emit(Inst::UnaryRR { op, rd, rn }); + } + } + + Opcode::Imul => { + let ty = ty.unwrap(); + let alu_op = choose_32_64(ty, ALUOp::Mul32, ALUOp::Mul64); + let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); + let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); + if let Some(imm) = input_matches_simm16(ctx, inputs[1]) { + ctx.emit(Inst::gen_move(rd, rn, ty)); + ctx.emit(Inst::AluRSImm16 { alu_op, rd, imm }); + } else if let Some(imm) = input_matches_simm32(ctx, inputs[1]) { + ctx.emit(Inst::gen_move(rd, rn, ty)); + ctx.emit(Inst::AluRSImm32 { alu_op, rd, imm }); + } else if let Some(mem) = input_matches_mem(ctx, inputs[1]) { + ctx.emit(Inst::gen_move(rd, rn, ty)); + ctx.emit(Inst::AluRX { alu_op, rd, mem }); + } else if let Some(mem) = input_matches_sext16_mem(ctx, inputs[1]) { + let alu_op = choose_32_64(ty, ALUOp::Mul32Ext16, ALUOp::Mul64Ext16); + ctx.emit(Inst::gen_move(rd, rn, ty)); + ctx.emit(Inst::AluRX { alu_op, rd, mem }); + } else if let Some(mem) = input_matches_sext32_mem(ctx, inputs[1]) { + ctx.emit(Inst::gen_move(rd, rn, ty)); + ctx.emit(Inst::AluRX { + alu_op: ALUOp::Mul64Ext32, + rd, + mem, + }); + } else if let Some(rm) = input_matches_sext32_reg(ctx, inputs[1]) { + ctx.emit(Inst::gen_move(rd, rn, ty)); + ctx.emit(Inst::AluRR { + alu_op: ALUOp::Mul64Ext32, + rd, + rm, + }); + } else { + let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None); + ctx.emit(Inst::AluRRR { alu_op, rd, rn, rm }); + } + } + + Opcode::Umulhi | Opcode::Smulhi => { + let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); + let is_signed = op == Opcode::Smulhi; + let input_ty = ctx.input_ty(insn, 0); + assert!(ctx.input_ty(insn, 1) == input_ty); + assert!(ctx.output_ty(insn, 0) == input_ty); + + match input_ty { + types::I64 => { + let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); + let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None); + + if is_signed { + ctx.emit(Inst::SMulWide { rn, rm }); + ctx.emit(Inst::gen_move(rd, gpr(0), input_ty)); + } else { + ctx.emit(Inst::gen_move(writable_gpr(1), rm, input_ty)); + ctx.emit(Inst::UMulWide { rn }); + ctx.emit(Inst::gen_move(rd, gpr(0), input_ty)); + } + } + types::I32 => { + let narrow_mode = if is_signed { + NarrowValueMode::SignExtend64 + } else { + NarrowValueMode::ZeroExtend64 + }; + let rn = put_input_in_reg(ctx, inputs[0], narrow_mode); + let rm = put_input_in_reg(ctx, inputs[1], narrow_mode); + ctx.emit(Inst::AluRRR { + alu_op: ALUOp::Mul64, + rd, + rn, + rm, + }); + let shift_op = if is_signed { + ShiftOp::AShR64 + } else { + ShiftOp::LShR64 + }; + ctx.emit(Inst::ShiftRR { + shift_op, + rd, + rn: rd.to_reg(), + shift_imm: SImm20::maybe_from_i64(32).unwrap(), + shift_reg: None, + }); + } + types::I16 | types::I8 => { + let narrow_mode = if is_signed { + NarrowValueMode::SignExtend32 + } else { + NarrowValueMode::ZeroExtend32 + }; + let rn = put_input_in_reg(ctx, inputs[0], narrow_mode); + let rm = put_input_in_reg(ctx, inputs[1], narrow_mode); + ctx.emit(Inst::AluRRR { + alu_op: ALUOp::Mul32, + rd, + rn, + rm, + }); + let shift_op = if is_signed { + ShiftOp::AShR32 + } else { + ShiftOp::LShR32 + }; + let shift_amt = match input_ty { + types::I16 => 16, + types::I8 => 8, + _ => unreachable!(), + }; + ctx.emit(Inst::ShiftRR { + shift_op, + rd, + rn: rd.to_reg(), + shift_imm: SImm20::maybe_from_i64(shift_amt).unwrap(), + shift_reg: None, + }); + } + _ => { + panic!("Unsupported argument type for umulhi/smulhi: {}", input_ty); + } + } + } + + Opcode::Udiv | Opcode::Urem => { + let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); + let ty = ty.unwrap(); + + let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); + if ty_bits(ty) <= 32 { + lower_constant_u32(ctx, writable_gpr(0), 0); + if ty_bits(ty) < 32 { + ctx.emit(Inst::Extend { + rd: writable_gpr(1), + rn, + signed: false, + from_bits: ty_bits(ty) as u8, + to_bits: 32, + }); + } else { + ctx.emit(Inst::mov32(writable_gpr(1), rn)); + } + } else { + lower_constant_u64(ctx, writable_gpr(0), 0); + ctx.emit(Inst::mov64(writable_gpr(1), rn)); + } + + let narrow_mode = if ty.bits() < 32 { + NarrowValueMode::ZeroExtend32 + } else { + NarrowValueMode::None + }; + let rm = put_input_in_reg(ctx, inputs[1], narrow_mode); + + if input_maybe_imm(ctx, inputs[1], 0) && flags.avoid_div_traps() { + ctx.emit(Inst::CmpTrapRSImm16 { + op: choose_32_64(ty, CmpOp::CmpS32, CmpOp::CmpS64), + rn: rm, + imm: 0, + cond: Cond::from_intcc(IntCC::Equal), + trap_code: TrapCode::IntegerDivisionByZero, + }); + } + + if ty_bits(ty) <= 32 { + ctx.emit(Inst::UDivMod32 { rn: rm }); + } else { + ctx.emit(Inst::UDivMod64 { rn: rm }); + } + + if op == Opcode::Udiv { + ctx.emit(Inst::gen_move(rd, gpr(1), ty)); + } else { + ctx.emit(Inst::gen_move(rd, gpr(0), ty)); + } + } + + Opcode::Sdiv | Opcode::Srem => { + let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); + let ty = ty.unwrap(); + + let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); + if ty_bits(ty) < 64 { + ctx.emit(Inst::Extend { + rd: writable_gpr(1), + rn, + signed: true, + from_bits: ty_bits(ty) as u8, + to_bits: 64, + }); + } else { + ctx.emit(Inst::mov64(writable_gpr(1), rn)); + } + + let narrow_mode = if ty.bits() < 32 { + NarrowValueMode::SignExtend32 + } else { + NarrowValueMode::None + }; + let rm = put_input_in_reg(ctx, inputs[1], narrow_mode); + + if input_maybe_imm(ctx, inputs[1], 0) && flags.avoid_div_traps() { + ctx.emit(Inst::CmpTrapRSImm16 { + op: choose_32_64(ty, CmpOp::CmpS32, CmpOp::CmpS64), + rn: rm, + imm: 0, + cond: Cond::from_intcc(IntCC::Equal), + trap_code: TrapCode::IntegerDivisionByZero, + }); + } + + if input_maybe_imm(ctx, inputs[1], 0xffff_ffff_ffff_ffff) { + if op == Opcode::Sdiv { + let tmp = ctx.alloc_tmp(ty).only_reg().unwrap(); + if ty_bits(ty) <= 32 { + lower_constant_u32(ctx, tmp, (1 << (ty_bits(ty) - 1)) - 1); + } else { + lower_constant_u64(ctx, tmp, (1 << (ty_bits(ty) - 1)) - 1); + } + ctx.emit(Inst::AluRRR { + alu_op: choose_32_64(ty, ALUOp::Xor32, ALUOp::Xor64), + rd: tmp, + rn: tmp.to_reg(), + rm: gpr(1), + }); + ctx.emit(Inst::AluRRR { + alu_op: choose_32_64(ty, ALUOp::And32, ALUOp::And64), + rd: tmp, + rn: tmp.to_reg(), + rm, + }); + ctx.emit(Inst::CmpTrapRSImm16 { + op: choose_32_64(ty, CmpOp::CmpS32, CmpOp::CmpS64), + rn: tmp.to_reg(), + imm: -1, + cond: Cond::from_intcc(IntCC::Equal), + trap_code: TrapCode::IntegerOverflow, + }); + } else { + if ty_bits(ty) > 32 { + ctx.emit(Inst::CmpRSImm16 { + op: CmpOp::CmpS64, + rn: rm, + imm: -1, + }); + ctx.emit(Inst::CMov64SImm16 { + rd: writable_gpr(1), + cond: Cond::from_intcc(IntCC::Equal), + imm: 0, + }); + } + } + } + + if ty_bits(ty) <= 32 { + ctx.emit(Inst::SDivMod32 { rn: rm }); + } else { + ctx.emit(Inst::SDivMod64 { rn: rm }); + } + + if op == Opcode::Sdiv { + ctx.emit(Inst::gen_move(rd, gpr(1), ty)); + } else { + ctx.emit(Inst::gen_move(rd, gpr(0), ty)); + } + } + + Opcode::Uextend | Opcode::Sextend => { + let ty = ty.unwrap(); + let to_bits = ty_bits(ty) as u8; + let to_bits = std::cmp::max(32, to_bits); + let narrow_mode = match (op, to_bits) { + (Opcode::Uextend, 32) => NarrowValueMode::ZeroExtend32, + (Opcode::Uextend, 64) => NarrowValueMode::ZeroExtend64, + (Opcode::Sextend, 32) => NarrowValueMode::SignExtend32, + (Opcode::Sextend, 64) => NarrowValueMode::SignExtend64, + _ => unreachable!(), + }; + let rn = put_input_in_reg(ctx, inputs[0], narrow_mode); + let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); + ctx.emit(Inst::gen_move(rd, rn, ty)); + } + + Opcode::Ishl | Opcode::Ushr | Opcode::Sshr => { + let ty = ty.unwrap(); + let size = ty_bits(ty); + let narrow_mode = match (op, size) { + (Opcode::Ishl, _) => NarrowValueMode::None, + (Opcode::Ushr, 64) => NarrowValueMode::ZeroExtend64, + (Opcode::Ushr, _) => NarrowValueMode::ZeroExtend32, + (Opcode::Sshr, 64) => NarrowValueMode::SignExtend64, + (Opcode::Sshr, _) => NarrowValueMode::SignExtend32, + _ => unreachable!(), + }; + let shift_op = match op { + Opcode::Ishl => choose_32_64(ty, ShiftOp::LShL32, ShiftOp::LShL64), + Opcode::Ushr => choose_32_64(ty, ShiftOp::LShR32, ShiftOp::LShR64), + Opcode::Sshr => choose_32_64(ty, ShiftOp::AShR32, ShiftOp::AShR64), + _ => unreachable!(), + }; + let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); + let rn = put_input_in_reg(ctx, inputs[0], narrow_mode); + if let Some(imm) = input_matches_const(ctx, inputs[1]) { + let imm = imm & if size < 64 { 31 } else { 63 }; + let shift_imm = SImm20::maybe_from_i64(imm as i64).unwrap(); + let shift_reg = None; + ctx.emit(Inst::ShiftRR { + shift_op, + rd, + rn, + shift_imm, + shift_reg, + }); + } else { + let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None); + let shift_imm = SImm20::zero(); + let shift_reg = if size < 64 { + let tmp = ctx.alloc_tmp(types::I64).only_reg().unwrap(); + ctx.emit(Inst::gen_move(tmp, rm, types::I64)); + ctx.emit(Inst::AluRUImm16Shifted { + alu_op: ALUOp::And64, + rd: tmp, + imm: UImm16Shifted::maybe_from_u64(31).unwrap(), + }); + Some(tmp.to_reg()) + } else { + Some(rm) + }; + ctx.emit(Inst::ShiftRR { + shift_op, + rd, + rn, + shift_imm, + shift_reg, + }); + } + } + + Opcode::Rotr | Opcode::Rotl => { + // s390x doesn't have a right-rotate instruction, but a right rotation of K places is + // effectively a left rotation of N - K places, if N is the integer's bit size. We + // implement right rotations with this trick. + // + // For a 32-bit or 64-bit rotate-left, we can use the ROR instruction directly. + // + // For a < 32-bit rotate-left, we synthesize this as: + // + // rotr rd, rn, rm + // + // => + // + // zero-extend rn, <32-or-64> + // and tmp_masked_rm, rm, + // sub tmp1, tmp_masked_rm, + // sub tmp1, zero, tmp1 ; neg + // lsr tmp2, rn, tmp_masked_rm + // lsl rd, rn, tmp1 + // orr rd, rd, tmp2 + // + // For a constant amount, we can instead do: + // + // zero-extend rn, <32-or-64> + // lsr tmp2, rn, # + // lsl rd, rn, + // orr rd, rd, tmp2 + + let is_rotr = op == Opcode::Rotr; + + let ty = ty.unwrap(); + let ty_bits_size = ty_bits(ty) as u64; + + let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); + let rn = put_input_in_reg( + ctx, + inputs[0], + if ty_bits_size <= 32 { + NarrowValueMode::ZeroExtend32 + } else { + NarrowValueMode::ZeroExtend64 + }, + ); + + if ty_bits_size == 32 || ty_bits_size == 64 { + let shift_op = choose_32_64(ty, ShiftOp::RotL32, ShiftOp::RotL64); + if let Some(imm) = input_matches_const(ctx, inputs[1]) { + let shiftcount = imm & (ty_bits_size - 1); + let shiftcount = if is_rotr { + ty_bits_size - shiftcount + } else { + shiftcount + }; + ctx.emit(Inst::ShiftRR { + shift_op, + rd, + rn, + shift_imm: SImm20::maybe_from_i64(shiftcount as i64).unwrap(), + shift_reg: None, + }); + } else { + let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None); + let rm = if is_rotr { + // Really ty_bits_size - rn, but the upper bits of the result are + // ignored (because of the implicit masking done by the instruction), + // so this is equivalent to negating the input. + let op = choose_32_64(ty, UnaryOp::Neg32, UnaryOp::Neg64); + let tmp = ctx.alloc_tmp(ty).only_reg().unwrap(); + ctx.emit(Inst::UnaryRR { + op, + rd: tmp, + rn: rm, + }); + tmp.to_reg() + } else { + rm + }; + ctx.emit(Inst::ShiftRR { + shift_op, + rd, + rn, + shift_imm: SImm20::zero(), + shift_reg: Some(rm), + }); + } + } else { + debug_assert!(ty_bits_size < 32); + + if let Some(imm) = input_matches_const(ctx, inputs[1]) { + let rot_count = imm & (ty_bits_size - 1); + let (lshl_count, lshr_count) = if is_rotr { + (ty_bits_size - rot_count, rot_count) + } else { + (rot_count, ty_bits_size - rot_count) + }; + + let tmp1 = ctx.alloc_tmp(types::I32).only_reg().unwrap(); + ctx.emit(Inst::ShiftRR { + shift_op: ShiftOp::LShL32, + rd: tmp1, + rn, + shift_imm: SImm20::maybe_from_i64(lshl_count as i64).unwrap(), + shift_reg: None, + }); + + let tmp2 = ctx.alloc_tmp(types::I32).only_reg().unwrap(); + ctx.emit(Inst::ShiftRR { + shift_op: ShiftOp::LShR32, + rd: tmp2, + rn, + shift_imm: SImm20::maybe_from_i64(lshr_count as i64).unwrap(), + shift_reg: None, + }); + + ctx.emit(Inst::AluRRR { + alu_op: ALUOp::Orr32, + rd, + rn: tmp1.to_reg(), + rm: tmp2.to_reg(), + }); + } else { + let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None); + let tmp1 = ctx.alloc_tmp(types::I32).only_reg().unwrap(); + let tmp2 = ctx.alloc_tmp(types::I32).only_reg().unwrap(); + + ctx.emit(Inst::mov32(tmp1, rm)); + ctx.emit(Inst::UnaryRR { + op: UnaryOp::Neg32, + rd: tmp2, + rn: rm, + }); + + ctx.emit(Inst::AluRUImm16Shifted { + alu_op: ALUOp::And32, + rd: tmp1, + imm: UImm16Shifted::maybe_from_u64(ty_bits_size - 1).unwrap(), + }); + ctx.emit(Inst::AluRUImm16Shifted { + alu_op: ALUOp::And32, + rd: tmp2, + imm: UImm16Shifted::maybe_from_u64(ty_bits_size - 1).unwrap(), + }); + + let (lshr, lshl) = if is_rotr { (tmp2, tmp1) } else { (tmp1, tmp2) }; + + ctx.emit(Inst::ShiftRR { + shift_op: ShiftOp::LShL32, + rd: lshl, + rn, + shift_imm: SImm20::zero(), + shift_reg: Some(lshl.to_reg()), + }); + + ctx.emit(Inst::ShiftRR { + shift_op: ShiftOp::LShR32, + rd: lshr, + rn, + shift_imm: SImm20::zero(), + shift_reg: Some(lshr.to_reg()), + }); + + ctx.emit(Inst::AluRRR { + alu_op: ALUOp::Orr32, + rd, + rn: lshl.to_reg(), + rm: lshr.to_reg(), + }); + } + } + } + + Opcode::Bnot => { + let ty = ty.unwrap(); + let alu_op = choose_32_64(ty, ALUOp::OrrNot32, ALUOp::OrrNot64); + let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); + let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); + ctx.emit(Inst::AluRRR { + alu_op, + rd, + rn, + rm: rn, + }); + } + + Opcode::Band => { + let ty = ty.unwrap(); + let alu_op = choose_32_64(ty, ALUOp::And32, ALUOp::And64); + let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); + let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); + if let Some(imm) = input_matches_uimm16shifted_inv(ctx, inputs[1]) { + ctx.emit(Inst::gen_move(rd, rn, ty)); + ctx.emit(Inst::AluRUImm16Shifted { alu_op, rd, imm }); + } else if let Some(imm) = input_matches_uimm32shifted_inv(ctx, inputs[1]) { + ctx.emit(Inst::gen_move(rd, rn, ty)); + ctx.emit(Inst::AluRUImm32Shifted { alu_op, rd, imm }); + } else if let Some(mem) = input_matches_mem(ctx, inputs[1]) { + ctx.emit(Inst::gen_move(rd, rn, ty)); + ctx.emit(Inst::AluRX { alu_op, rd, mem }); + } else { + let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None); + ctx.emit(Inst::AluRRR { alu_op, rd, rn, rm }); + } + } + + Opcode::Bor => { + let ty = ty.unwrap(); + let alu_op = choose_32_64(ty, ALUOp::Orr32, ALUOp::Orr64); + let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); + let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); + if let Some(imm) = input_matches_uimm16shifted(ctx, inputs[1]) { + ctx.emit(Inst::gen_move(rd, rn, ty)); + ctx.emit(Inst::AluRUImm16Shifted { alu_op, rd, imm }); + } else if let Some(imm) = input_matches_uimm32shifted(ctx, inputs[1]) { + ctx.emit(Inst::gen_move(rd, rn, ty)); + ctx.emit(Inst::AluRUImm32Shifted { alu_op, rd, imm }); + } else if let Some(mem) = input_matches_mem(ctx, inputs[1]) { + ctx.emit(Inst::gen_move(rd, rn, ty)); + ctx.emit(Inst::AluRX { alu_op, rd, mem }); + } else { + let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None); + ctx.emit(Inst::AluRRR { alu_op, rd, rn, rm }); + } + } + + Opcode::Bxor => { + let ty = ty.unwrap(); + let alu_op = choose_32_64(ty, ALUOp::Xor32, ALUOp::Xor64); + let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); + let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); + if let Some(imm) = input_matches_uimm32shifted(ctx, inputs[1]) { + ctx.emit(Inst::gen_move(rd, rn, ty)); + ctx.emit(Inst::AluRUImm32Shifted { alu_op, rd, imm }); + } else if let Some(mem) = input_matches_mem(ctx, inputs[1]) { + ctx.emit(Inst::gen_move(rd, rn, ty)); + ctx.emit(Inst::AluRX { alu_op, rd, mem }); + } else { + let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None); + ctx.emit(Inst::AluRRR { alu_op, rd, rn, rm }); + } + } + + Opcode::BandNot | Opcode::BorNot | Opcode::BxorNot => { + let ty = ty.unwrap(); + let alu_op = match op { + Opcode::BandNot => choose_32_64(ty, ALUOp::AndNot32, ALUOp::AndNot64), + Opcode::BorNot => choose_32_64(ty, ALUOp::OrrNot32, ALUOp::OrrNot64), + Opcode::BxorNot => choose_32_64(ty, ALUOp::XorNot32, ALUOp::XorNot64), + _ => unreachable!(), + }; + let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); + let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); + let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None); + ctx.emit(Inst::AluRRR { alu_op, rd, rn, rm }); + } + + Opcode::Bitselect => { + let ty = ty.unwrap(); + let tmp = ctx.alloc_tmp(types::I64).only_reg().unwrap(); + let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); + let rcond = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); + let rn = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None); + let rm = put_input_in_reg(ctx, inputs[2], NarrowValueMode::None); + ctx.emit(Inst::AluRRR { + alu_op: choose_32_64(ty, ALUOp::And32, ALUOp::And64), + rd: tmp, + rn, + rm: rcond, + }); + ctx.emit(Inst::AluRRR { + alu_op: choose_32_64(ty, ALUOp::AndNot32, ALUOp::AndNot64), + rd, + rn: rm, + rm: rcond, + }); + ctx.emit(Inst::AluRRR { + alu_op: choose_32_64(ty, ALUOp::Orr32, ALUOp::Orr64), + rd, + rn: rd.to_reg(), + rm: tmp.to_reg(), + }); + } + + Opcode::Bextend | Opcode::Bmask => { + // Bextend and Bmask both simply sign-extend. This works for: + // - Bextend, because booleans are stored as 0 / -1, so we + // sign-extend the -1 to a -1 in the wider width. + // - Bmask, because the resulting integer mask value must be + // all-ones (-1) if the argument is true. + // + // For a sign-extension from a 1-bit value (Case 1 below), we need + // to do things a bit specially, because the ISA does not have a + // 1-to-N-bit sign extension instruction. For 8-bit or wider + // sources (Case 2 below), we do a sign extension normally. + + let from_ty = ctx.input_ty(insn, 0); + let to_ty = ctx.output_ty(insn, 0); + let from_bits = ty_bits(from_ty); + let to_bits = ty_bits(to_ty); + + assert!( + from_bits <= 64 && to_bits <= 64, + "Vector Bextend not supported yet" + ); + + if from_bits >= to_bits { + // Just a move. + let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); + let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); + let ty = ctx.input_ty(insn, 0); + ctx.emit(Inst::gen_move(rd, rn, ty)); + } else if from_bits == 1 { + assert!(to_bits >= 8); + // Case 1: 1-bit to N-bit extension: use a shift-left / + // shift-right sequence to create a 0 / -1 result. + let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); + let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); + let shl_op = choose_32_64(to_ty, ShiftOp::LShL32, ShiftOp::LShL64); + let shr_op = choose_32_64(to_ty, ShiftOp::AShR32, ShiftOp::AShR64); + let count = if to_bits > 32 { 63 } else { 31 }; + ctx.emit(Inst::ShiftRR { + shift_op: shl_op, + rd, + rn, + shift_imm: SImm20::maybe_from_i64(count.into()).unwrap(), + shift_reg: None, + }); + ctx.emit(Inst::ShiftRR { + shift_op: shr_op, + rd, + rn: rd.to_reg(), + shift_imm: SImm20::maybe_from_i64(count.into()).unwrap(), + shift_reg: None, + }); + } else { + // Case 2: 8-or-more-bit to N-bit extension: just sign-extend. A + // `true` (all ones, or `-1`) will be extended to -1 with the + // larger width. + assert!(from_bits >= 8); + let narrow_mode = if to_bits == 64 { + NarrowValueMode::SignExtend64 + } else { + assert!(to_bits <= 32); + NarrowValueMode::SignExtend32 + }; + let rn = put_input_in_reg(ctx, inputs[0], narrow_mode); + let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); + ctx.emit(Inst::gen_move(rd, rn, to_ty)); + } + } + + Opcode::Bint => { + // Booleans are stored as all-zeroes (0) or all-ones (-1). We AND + // out the LSB to give a 0 / 1-valued integer result. + let ty = ty.unwrap(); + let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); + let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); + if ty_bits(ty) <= 16 { + ctx.emit(Inst::gen_move(rd, rn, ty)); + ctx.emit(Inst::AluRUImm16Shifted { + alu_op: ALUOp::And32, + rd, + imm: UImm16Shifted::maybe_from_u64(1).unwrap(), + }); + } else if ty_bits(ty) <= 32 { + ctx.emit(Inst::gen_move(rd, rn, ty)); + ctx.emit(Inst::AluRUImm32Shifted { + alu_op: ALUOp::And32, + rd, + imm: UImm32Shifted::maybe_from_u64(1).unwrap(), + }); + } else { + let tmp = ctx.alloc_tmp(types::I64).only_reg().unwrap(); + lower_constant_u64(ctx, tmp, 1); + ctx.emit(Inst::AluRRR { + alu_op: ALUOp::And64, + rd, + rn, + rm: tmp.to_reg(), + }); + } + } + + Opcode::Clz => { + let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); + let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); + let ty = ty.unwrap(); + let ty_bits_size = ty_bits(ty); + + let rn = if ty_bits_size < 64 { + let tmp = ctx.alloc_tmp(types::I64).only_reg().unwrap(); + ctx.emit(Inst::Extend { + rd: tmp, + rn, + signed: false, + from_bits: ty_bits_size as u8, + to_bits: 64, + }); + tmp.to_reg() + } else { + rn + }; + + ctx.emit(Inst::Flogr { rn }); + ctx.emit(Inst::gen_move(rd, gpr(0), ty)); + + if ty_bits_size < 64 { + ctx.emit(Inst::AluRSImm16 { + alu_op: ALUOp::Add32, + rd, + imm: -(64 - ty_bits_size as i16), + }); + } + } + + Opcode::Cls => { + let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); + let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); + let ty = ty.unwrap(); + let ty_bits_size = ty_bits(ty); + + let rn = if ty_bits_size < 64 { + let tmp = ctx.alloc_tmp(types::I64).only_reg().unwrap(); + ctx.emit(Inst::Extend { + rd: tmp, + rn, + signed: true, + from_bits: ty_bits_size as u8, + to_bits: 64, + }); + tmp.to_reg() + } else { + rn + }; + + // tmp = rn ^ ((signed)rn >> 63) + let tmp = ctx.alloc_tmp(types::I64).only_reg().unwrap(); + ctx.emit(Inst::ShiftRR { + shift_op: ShiftOp::AShR64, + rd: tmp, + rn, + shift_imm: SImm20::maybe_from_i64(63).unwrap(), + shift_reg: None, + }); + ctx.emit(Inst::AluRRR { + alu_op: ALUOp::Xor64, + rd: tmp, + rn: tmp.to_reg(), + rm: rn, + }); + + ctx.emit(Inst::Flogr { rn }); + ctx.emit(Inst::gen_move(rd, gpr(0), ty)); + + if ty_bits_size < 64 { + ctx.emit(Inst::AluRSImm16 { + alu_op: ALUOp::Add32, + rd, + imm: -(64 - ty_bits_size as i16), + }); + } + } + + Opcode::Ctz => { + let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); + let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); + let ty = ty.unwrap(); + let ty_bits_size = ty_bits(ty); + + let rn = if ty_bits_size < 64 { + let tmp = ctx.alloc_tmp(types::I64).only_reg().unwrap(); + ctx.emit(Inst::gen_move(tmp, rn, ty)); + ctx.emit(Inst::AluRUImm16Shifted { + alu_op: ALUOp::Orr64, + rd: tmp, + imm: UImm16Shifted::maybe_from_u64(1u64 << ty_bits_size).unwrap(), + }); + tmp.to_reg() + } else { + rn + }; + + // tmp = rn & -rn + let tmp = ctx.alloc_tmp(types::I64).only_reg().unwrap(); + ctx.emit(Inst::UnaryRR { + op: UnaryOp::Neg64, + rd: tmp, + rn, + }); + ctx.emit(Inst::AluRRR { + alu_op: ALUOp::And64, + rd: tmp, + rn: tmp.to_reg(), + rm: rn, + }); + + ctx.emit(Inst::Flogr { rn: tmp.to_reg() }); + if ty_bits_size == 64 { + ctx.emit(Inst::CMov64SImm16 { + rd: writable_gpr(0), + cond: Cond::from_intcc(IntCC::Equal), + imm: -1, + }); + } + + if ty_bits_size <= 32 { + lower_constant_u32(ctx, rd, 63); + } else { + lower_constant_u64(ctx, rd, 63); + } + let alu_op = choose_32_64(ty, ALUOp::Sub32, ALUOp::Sub64); + ctx.emit(Inst::AluRRR { + alu_op, + rd, + rn: rd.to_reg(), + rm: gpr(0), + }); + } + + Opcode::Bitrev => unimplemented!(), + + Opcode::Popcnt => { + let ty = ty.unwrap(); + let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); + if ty_bits(ty) <= 8 { + let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); + ctx.emit(Inst::UnaryRR { + op: UnaryOp::PopcntByte, + rd, + rn, + }); + } else { + let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::ZeroExtend64); + ctx.emit(Inst::UnaryRR { + op: UnaryOp::PopcntReg, + rd, + rn, + }); + } + } + + Opcode::Fadd | Opcode::Fsub | Opcode::Fmul | Opcode::Fdiv => { + let bits = ty_bits(ctx.output_ty(insn, 0)); + let fpu_op = match (op, bits) { + (Opcode::Fadd, 32) => FPUOp2::Add32, + (Opcode::Fadd, 64) => FPUOp2::Add64, + (Opcode::Fsub, 32) => FPUOp2::Sub32, + (Opcode::Fsub, 64) => FPUOp2::Sub64, + (Opcode::Fmul, 32) => FPUOp2::Mul32, + (Opcode::Fmul, 64) => FPUOp2::Mul64, + (Opcode::Fdiv, 32) => FPUOp2::Div32, + (Opcode::Fdiv, 64) => FPUOp2::Div64, + _ => panic!("Unknown op/bits combination"), + }; + let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); + let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None); + let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); + ctx.emit(Inst::mov64(rd, rn)); + ctx.emit(Inst::FpuRRR { fpu_op, rd, rm }); + } + + Opcode::Fmin | Opcode::Fmax => { + let bits = ty_bits(ctx.output_ty(insn, 0)); + let fpu_op = match (op, bits) { + (Opcode::Fmin, 32) => FPUOp2::Min32, + (Opcode::Fmin, 64) => FPUOp2::Min64, + (Opcode::Fmax, 32) => FPUOp2::Max32, + (Opcode::Fmax, 64) => FPUOp2::Max64, + _ => panic!("Unknown op/bits combination"), + }; + let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); + let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None); + let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); + ctx.emit(Inst::FpuVecRRR { fpu_op, rd, rn, rm }); + } + + Opcode::Sqrt | Opcode::Fneg | Opcode::Fabs | Opcode::Fpromote | Opcode::Fdemote => { + let bits = ty_bits(ctx.output_ty(insn, 0)); + let fpu_op = match (op, bits) { + (Opcode::Sqrt, 32) => FPUOp1::Sqrt32, + (Opcode::Sqrt, 64) => FPUOp1::Sqrt64, + (Opcode::Fneg, 32) => FPUOp1::Neg32, + (Opcode::Fneg, 64) => FPUOp1::Neg64, + (Opcode::Fabs, 32) => FPUOp1::Abs32, + (Opcode::Fabs, 64) => FPUOp1::Abs64, + (Opcode::Fpromote, 32) => panic!("Cannot promote to 32 bits"), + (Opcode::Fpromote, 64) => FPUOp1::Cvt32To64, + (Opcode::Fdemote, 32) => FPUOp1::Cvt64To32, + (Opcode::Fdemote, 64) => panic!("Cannot demote to 64 bits"), + _ => panic!("Unknown op/bits combination"), + }; + let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); + let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); + ctx.emit(Inst::FpuRR { fpu_op, rd, rn }); + } + + Opcode::Ceil | Opcode::Floor | Opcode::Trunc | Opcode::Nearest => { + let bits = ty_bits(ctx.output_ty(insn, 0)); + let op = match (op, bits) { + (Opcode::Ceil, 32) => FpuRoundMode::Plus32, + (Opcode::Ceil, 64) => FpuRoundMode::Plus64, + (Opcode::Floor, 32) => FpuRoundMode::Minus32, + (Opcode::Floor, 64) => FpuRoundMode::Minus64, + (Opcode::Trunc, 32) => FpuRoundMode::Zero32, + (Opcode::Trunc, 64) => FpuRoundMode::Zero64, + (Opcode::Nearest, 32) => FpuRoundMode::Nearest32, + (Opcode::Nearest, 64) => FpuRoundMode::Nearest64, + _ => panic!("Unknown op/bits combination"), + }; + let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); + let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); + ctx.emit(Inst::FpuRound { op, rd, rn }); + } + + Opcode::Fma => { + let bits = ty_bits(ctx.output_ty(insn, 0)); + let fpu_op = match bits { + 32 => FPUOp3::MAdd32, + 64 => FPUOp3::MAdd64, + _ => panic!("Unknown op size"), + }; + let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); + let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None); + let ra = put_input_in_reg(ctx, inputs[2], NarrowValueMode::None); + let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); + ctx.emit(Inst::mov64(rd, ra)); + ctx.emit(Inst::FpuRRRR { fpu_op, rd, rn, rm }); + } + + Opcode::Fcopysign => { + let ty = ctx.output_ty(insn, 0); + let bits = ty_bits(ty) as u8; + assert!(bits == 32 || bits == 64); + let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); + let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None); + let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); + + ctx.emit(Inst::FpuCopysign { rd, rn, rm }); + } + + Opcode::FcvtFromUint | Opcode::FcvtFromSint => { + let in_bits = ty_bits(ctx.input_ty(insn, 0)); + let out_bits = ty_bits(ctx.output_ty(insn, 0)); + let signed = op == Opcode::FcvtFromSint; + let op = match (signed, in_bits, out_bits) { + (false, 32, 32) => IntToFpuOp::U32ToF32, + (true, 32, 32) => IntToFpuOp::I32ToF32, + (false, 32, 64) => IntToFpuOp::U32ToF64, + (true, 32, 64) => IntToFpuOp::I32ToF64, + (false, 64, 32) => IntToFpuOp::U64ToF32, + (true, 64, 32) => IntToFpuOp::I64ToF32, + (false, 64, 64) => IntToFpuOp::U64ToF64, + (true, 64, 64) => IntToFpuOp::I64ToF64, + _ => panic!("Unknown input/output-bits combination"), + }; + let narrow_mode = match (signed, in_bits) { + (false, 32) => NarrowValueMode::ZeroExtend32, + (true, 32) => NarrowValueMode::SignExtend32, + (false, 64) => NarrowValueMode::ZeroExtend64, + (true, 64) => NarrowValueMode::SignExtend64, + _ => panic!("Unknown input size"), + }; + let rn = put_input_in_reg(ctx, inputs[0], narrow_mode); + let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); + ctx.emit(Inst::IntToFpu { op, rd, rn }); + } + + Opcode::FcvtToUint | Opcode::FcvtToSint => { + let in_bits = ty_bits(ctx.input_ty(insn, 0)); + let out_bits = ty_bits(ctx.output_ty(insn, 0)); + let signed = op == Opcode::FcvtToSint; + let op = match (signed, in_bits, out_bits) { + (false, 32, 32) => FpuToIntOp::F32ToU32, + (true, 32, 32) => FpuToIntOp::F32ToI32, + (false, 32, 64) => FpuToIntOp::F32ToU64, + (true, 32, 64) => FpuToIntOp::F32ToI64, + (false, 64, 32) => FpuToIntOp::F64ToU32, + (true, 64, 32) => FpuToIntOp::F64ToI32, + (false, 64, 64) => FpuToIntOp::F64ToU64, + (true, 64, 64) => FpuToIntOp::F64ToI64, + _ => panic!("Unknown input/output-bits combination"), + }; + + let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); + let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); + + // First, check whether the input is a NaN and trap if so. + if in_bits == 32 { + ctx.emit(Inst::FpuCmp32 { rn, rm: rn }); + } else { + ctx.emit(Inst::FpuCmp64 { rn, rm: rn }); + } + ctx.emit(Inst::TrapIf { + trap_code: TrapCode::BadConversionToInteger, + cond: Cond::from_floatcc(FloatCC::Unordered), + }); + + // Perform the conversion. If this sets CC 3, we have a + // "special case". Since we already exluded the case where + // the input was a NaN, the only other option is that the + // conversion overflowed the target type. + ctx.emit(Inst::FpuToInt { op, rd, rn }); + ctx.emit(Inst::TrapIf { + trap_code: TrapCode::IntegerOverflow, + cond: Cond::from_floatcc(FloatCC::Unordered), + }); + } + + Opcode::FcvtToUintSat | Opcode::FcvtToSintSat => { + let in_bits = ty_bits(ctx.input_ty(insn, 0)); + let out_bits = ty_bits(ctx.output_ty(insn, 0)); + let signed = op == Opcode::FcvtToSintSat; + let op = match (signed, in_bits, out_bits) { + (false, 32, 32) => FpuToIntOp::F32ToU32, + (true, 32, 32) => FpuToIntOp::F32ToI32, + (false, 32, 64) => FpuToIntOp::F32ToU64, + (true, 32, 64) => FpuToIntOp::F32ToI64, + (false, 64, 32) => FpuToIntOp::F64ToU32, + (true, 64, 32) => FpuToIntOp::F64ToI32, + (false, 64, 64) => FpuToIntOp::F64ToU64, + (true, 64, 64) => FpuToIntOp::F64ToI64, + _ => panic!("Unknown input/output-bits combination"), + }; + + let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); + let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); + + // Perform the conversion. + ctx.emit(Inst::FpuToInt { op, rd, rn }); + + // In most special cases, the Z instruction already yields the + // result expected by Cranelift semantic. The only exception + // it the case where the input was a Nan. We explicitly check + // for that and force the output to 0 in that case. + if in_bits == 32 { + ctx.emit(Inst::FpuCmp32 { rn, rm: rn }); + } else { + ctx.emit(Inst::FpuCmp64 { rn, rm: rn }); + } + let cond = Cond::from_floatcc(FloatCC::Unordered); + if out_bits <= 32 { + ctx.emit(Inst::CMov32SImm16 { rd, cond, imm: 0 }); + } else { + ctx.emit(Inst::CMov64SImm16 { rd, cond, imm: 0 }); + } + } + + Opcode::FcvtLowFromSint => unimplemented!("FcvtLowFromSint"), + + Opcode::Bitcast => { + let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); + let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); + let input_ty = ctx.input_ty(insn, 0); + let output_ty = ctx.output_ty(insn, 0); + match (input_ty, output_ty) { + (types::I64, types::F64) => { + ctx.emit(Inst::MovToFpr { rd, rn }); + } + (types::F64, types::I64) => { + ctx.emit(Inst::MovFromFpr { rd, rn }); + } + (types::I32, types::F32) => { + let tmp = ctx.alloc_tmp(types::I64).only_reg().unwrap(); + ctx.emit(Inst::ShiftRR { + shift_op: ShiftOp::LShL64, + rd: tmp, + rn, + shift_imm: SImm20::maybe_from_i64(32).unwrap(), + shift_reg: None, + }); + ctx.emit(Inst::MovToFpr { + rd, + rn: tmp.to_reg(), + }); + } + (types::F32, types::I32) => { + let tmp = ctx.alloc_tmp(types::I64).only_reg().unwrap(); + ctx.emit(Inst::MovFromFpr { rd: tmp, rn }); + ctx.emit(Inst::ShiftRR { + shift_op: ShiftOp::LShR64, + rd, + rn: tmp.to_reg(), + shift_imm: SImm20::maybe_from_i64(32).unwrap(), + shift_reg: None, + }); + } + _ => unreachable!("invalid bitcast from {:?} to {:?}", input_ty, output_ty), + } + } + + Opcode::Load + | Opcode::Uload8 + | Opcode::Sload8 + | Opcode::Uload16 + | Opcode::Sload16 + | Opcode::Uload32 + | Opcode::Sload32 + | Opcode::LoadComplex + | Opcode::Uload8Complex + | Opcode::Sload8Complex + | Opcode::Uload16Complex + | Opcode::Sload16Complex + | Opcode::Uload32Complex + | Opcode::Sload32Complex => { + let off = ctx.data(insn).load_store_offset().unwrap(); + let flags = ctx.memflags(insn).unwrap(); + let endianness = flags.endianness(Endianness::Big); + let elem_ty = ctx.output_ty(insn, 0); + let is_float = ty_is_float(elem_ty); + let to_bits = ty_bits(elem_ty); + let from_bits = match op { + Opcode::Load | Opcode::LoadComplex => to_bits, + Opcode::Sload8 | Opcode::Uload8 | Opcode::Sload8Complex | Opcode::Uload8Complex => { + 8 + } + Opcode::Sload16 + | Opcode::Uload16 + | Opcode::Sload16Complex + | Opcode::Uload16Complex => 16, + Opcode::Sload32 + | Opcode::Uload32 + | Opcode::Sload32Complex + | Opcode::Uload32Complex => 32, + _ => unreachable!(), + }; + let ext_bits = if to_bits < 32 { 32 } else { to_bits }; + let sign_extend = match op { + Opcode::Sload8 + | Opcode::Sload8Complex + | Opcode::Sload16 + | Opcode::Sload16Complex + | Opcode::Sload32 + | Opcode::Sload32Complex => true, + _ => false, + }; + + let mem = lower_address(ctx, &inputs[..], off, flags); + let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); + + if endianness == Endianness::Big { + ctx.emit(match (ext_bits, from_bits, sign_extend, is_float) { + (32, 32, _, true) => Inst::FpuLoad32 { rd, mem }, + (64, 64, _, true) => Inst::FpuLoad64 { rd, mem }, + (32, 32, _, false) => Inst::Load32 { rd, mem }, + (64, 64, _, false) => Inst::Load64 { rd, mem }, + (32, 8, false, _) => Inst::Load32ZExt8 { rd, mem }, + (32, 8, true, _) => Inst::Load32SExt8 { rd, mem }, + (32, 16, false, _) => Inst::Load32ZExt16 { rd, mem }, + (32, 16, true, _) => Inst::Load32SExt16 { rd, mem }, + (64, 8, false, _) => Inst::Load64ZExt8 { rd, mem }, + (64, 8, true, _) => Inst::Load64SExt8 { rd, mem }, + (64, 16, false, _) => Inst::Load64ZExt16 { rd, mem }, + (64, 16, true, _) => Inst::Load64SExt16 { rd, mem }, + (64, 32, false, _) => Inst::Load64ZExt32 { rd, mem }, + (64, 32, true, _) => Inst::Load64SExt32 { rd, mem }, + _ => panic!("Unsupported size in load"), + }); + } else { + ctx.emit(match (ext_bits, from_bits, sign_extend, is_float) { + (32, 32, _, true) => Inst::FpuLoadRev32 { rd, mem }, + (64, 64, _, true) => Inst::FpuLoadRev64 { rd, mem }, + (_, 16, _, false) => Inst::LoadRev16 { rd, mem }, + (_, 32, _, false) => Inst::LoadRev32 { rd, mem }, + (_, 64, _, false) => Inst::LoadRev64 { rd, mem }, + (32, 8, false, _) => Inst::Load32ZExt8 { rd, mem }, + (32, 8, true, _) => Inst::Load32SExt8 { rd, mem }, + (64, 8, false, _) => Inst::Load64ZExt8 { rd, mem }, + (64, 8, true, _) => Inst::Load64SExt8 { rd, mem }, + _ => panic!("Unsupported size in load"), + }); + if to_bits > from_bits && from_bits > 8 { + assert!(is_float == false); + ctx.emit(Inst::Extend { + rd, + rn: rd.to_reg(), + signed: sign_extend, + from_bits: from_bits as u8, + to_bits: to_bits as u8, + }); + } + } + } + + Opcode::Store + | Opcode::Istore8 + | Opcode::Istore16 + | Opcode::Istore32 + | Opcode::StoreComplex + | Opcode::Istore8Complex + | Opcode::Istore16Complex + | Opcode::Istore32Complex => { + let off = ctx.data(insn).load_store_offset().unwrap(); + let flags = ctx.memflags(insn).unwrap(); + let endianness = flags.endianness(Endianness::Big); + let elem_ty = match op { + Opcode::Istore8 | Opcode::Istore8Complex => types::I8, + Opcode::Istore16 | Opcode::Istore16Complex => types::I16, + Opcode::Istore32 | Opcode::Istore32Complex => types::I32, + Opcode::Store | Opcode::StoreComplex => ctx.input_ty(insn, 0), + _ => unreachable!(), + }; + + let mem = lower_address(ctx, &inputs[1..], off, flags); + + if ty_is_float(elem_ty) { + let rd = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); + ctx.emit(match (endianness, ty_bits(elem_ty)) { + (Endianness::Big, 32) => Inst::FpuStore32 { rd, mem }, + (Endianness::Big, 64) => Inst::FpuStore64 { rd, mem }, + (Endianness::Little, 32) => Inst::FpuStoreRev32 { rd, mem }, + (Endianness::Little, 64) => Inst::FpuStoreRev64 { rd, mem }, + _ => panic!("Unsupported size in store"), + }); + } else if ty_bits(elem_ty) <= 16 { + if let Some(imm) = input_matches_const(ctx, inputs[0]) { + ctx.emit(match (endianness, ty_bits(elem_ty)) { + (_, 1) | (_, 8) => Inst::StoreImm8 { + imm: imm as u8, + mem, + }, + (Endianness::Big, 16) => Inst::StoreImm16 { + imm: imm as i16, + mem, + }, + (Endianness::Little, 16) => Inst::StoreImm16 { + imm: (imm as i16).swap_bytes(), + mem, + }, + _ => panic!("Unsupported size in store"), + }); + } else { + let rd = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); + ctx.emit(match (endianness, ty_bits(elem_ty)) { + (_, 1) | (_, 8) => Inst::Store8 { rd, mem }, + (Endianness::Big, 16) => Inst::Store16 { rd, mem }, + (Endianness::Little, 16) => Inst::StoreRev16 { rd, mem }, + _ => panic!("Unsupported size in store"), + }); + } + } else if endianness == Endianness::Big { + if let Some(imm) = input_matches_simm16(ctx, inputs[0]) { + ctx.emit(match ty_bits(elem_ty) { + 32 => Inst::StoreImm32SExt16 { imm, mem }, + 64 => Inst::StoreImm64SExt16 { imm, mem }, + _ => panic!("Unsupported size in store"), + }); + } else { + let rd = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); + ctx.emit(match ty_bits(elem_ty) { + 32 => Inst::Store32 { rd, mem }, + 64 => Inst::Store64 { rd, mem }, + _ => panic!("Unsupported size in store"), + }); + } + } else { + let rd = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); + ctx.emit(match ty_bits(elem_ty) { + 32 => Inst::StoreRev32 { rd, mem }, + 64 => Inst::StoreRev64 { rd, mem }, + _ => panic!("Unsupported size in store"), + }); + } + } + + Opcode::StackLoad | Opcode::StackStore => { + panic!("Direct stack memory access not supported; should not be used by Wasm"); + } + + Opcode::StackAddr => { + let (stack_slot, offset) = match *ctx.data(insn) { + InstructionData::StackLoad { + opcode: Opcode::StackAddr, + stack_slot, + offset, + } => (stack_slot, offset), + _ => unreachable!(), + }; + let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); + let offset: i32 = offset.into(); + let inst = ctx + .abi() + .stackslot_addr(stack_slot, u32::try_from(offset).unwrap(), rd); + ctx.emit(inst); + } + + Opcode::ConstAddr => unimplemented!(), + + Opcode::FuncAddr => { + let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); + let (extname, dist) = ctx.call_target(insn).unwrap(); + let extname = extname.clone(); + if dist == RelocDistance::Near { + ctx.emit(Inst::LoadAddr { + rd, + mem: MemArg::Symbol { + name: Box::new(extname), + offset: 0, + flags: MemFlags::trusted(), + }, + }); + } else { + ctx.emit(Inst::LoadExtNameFar { + rd, + name: Box::new(extname), + offset: 0, + }); + } + } + + Opcode::SymbolValue => { + let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); + let (extname, dist, offset) = ctx.symbol_value(insn).unwrap(); + let extname = extname.clone(); + if dist == RelocDistance::Near && (offset & 1) == 0 && i32::try_from(offset).is_ok() { + ctx.emit(Inst::LoadAddr { + rd, + mem: MemArg::Symbol { + name: Box::new(extname), + offset: i32::try_from(offset).unwrap(), + flags: MemFlags::trusted(), + }, + }); + } else { + ctx.emit(Inst::LoadExtNameFar { + rd, + name: Box::new(extname), + offset, + }); + } + } + + Opcode::HeapAddr => { + panic!("heap_addr should have been removed by legalization!"); + } + + Opcode::TableAddr => { + panic!("table_addr should have been removed by legalization!"); + } + + Opcode::GlobalValue => { + panic!("global_value should have been removed by legalization!"); + } + + Opcode::TlsValue => { + panic!("Thread-local storage support not implemented!"); + } + + Opcode::GetPinnedReg | Opcode::SetPinnedReg => { + panic!("Pinned register support not implemented!"); + } + + Opcode::Icmp => { + let condcode = ctx.data(insn).cond_code().unwrap(); + let cond = Cond::from_intcc(condcode); + let is_signed = condcode_is_signed(condcode); + lower_icmp_to_flags(ctx, insn, is_signed, true); + + let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); + let ty = ctx.output_ty(insn, 0); + lower_flags_to_bool_result(ctx, cond, rd, ty); + } + + Opcode::Fcmp => { + let condcode = ctx.data(insn).fp_cond_code().unwrap(); + let cond = Cond::from_floatcc(condcode); + lower_fcmp_to_flags(ctx, insn); + + let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); + let ty = ctx.output_ty(insn, 0); + lower_flags_to_bool_result(ctx, cond, rd, ty); + } + + Opcode::IsNull | Opcode::IsInvalid => { + // Null references are represented by the constant value 0; invalid + // references are represented by the constant value -1. + let cond = Cond::from_intcc(IntCC::Equal); + let imm = match op { + Opcode::IsNull => 0, + Opcode::IsInvalid => -1, + _ => unreachable!(), + }; + let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); + ctx.emit(Inst::CmpRSImm16 { + op: CmpOp::CmpS64, + rn, + imm, + }); + + let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); + let ty = ctx.output_ty(insn, 0); + lower_flags_to_bool_result(ctx, cond, rd, ty); + } + + Opcode::Select => { + let ty = ctx.output_ty(insn, 0); + let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); + let rn = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None); + let rm = put_input_in_reg(ctx, inputs[2], NarrowValueMode::None); + let cond = lower_boolean_to_flags(ctx, inputs[0]); + ctx.emit(Inst::gen_move(rd, rm, ty)); + if ty_is_float(ty) { + if ty_bits(ty) < 64 { + ctx.emit(Inst::FpuCMov32 { rd, cond, rm: rn }); + } else { + ctx.emit(Inst::FpuCMov64 { rd, cond, rm: rn }); + } + } else { + if ty_bits(ty) < 64 { + ctx.emit(Inst::CMov32 { rd, cond, rm: rn }); + } else { + ctx.emit(Inst::CMov64 { rd, cond, rm: rn }); + } + } + } + + Opcode::SelectifSpectreGuard => { + let ty = ctx.output_ty(insn, 0); + let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); + let rn = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None); + let rm = put_input_in_reg(ctx, inputs[2], NarrowValueMode::None); + let condcode = ctx.data(insn).cond_code().unwrap(); + let cond = Cond::from_intcc(condcode); + let is_signed = condcode_is_signed(condcode); + + // Verification ensures that the input is always a single-def ifcmp. + let cmp_insn = ctx + .get_input_as_source_or_const(inputs[0].insn, inputs[0].input) + .inst + .unwrap() + .0; + debug_assert_eq!(ctx.data(cmp_insn).opcode(), Opcode::Ifcmp); + lower_icmp_to_flags(ctx, cmp_insn, is_signed, true); + + ctx.emit(Inst::gen_move(rd, rm, ty)); + if ty_is_float(ty) { + if ty_bits(ty) < 64 { + ctx.emit(Inst::FpuCMov32 { rd, cond, rm: rn }); + } else { + ctx.emit(Inst::FpuCMov64 { rd, cond, rm: rn }); + } + } else { + if ty_bits(ty) < 64 { + ctx.emit(Inst::CMov32 { rd, cond, rm: rn }); + } else { + ctx.emit(Inst::CMov64 { rd, cond, rm: rn }); + } + } + } + + Opcode::Trap | Opcode::ResumableTrap => { + let trap_code = ctx.data(insn).trap_code().unwrap(); + ctx.emit_safepoint(Inst::Trap { trap_code }) + } + + Opcode::Trapz | Opcode::Trapnz | Opcode::ResumableTrapnz => { + let cond = lower_boolean_to_flags(ctx, inputs[0]); + let negated = op == Opcode::Trapz; + let cond = if negated { cond.invert() } else { cond }; + let trap_code = ctx.data(insn).trap_code().unwrap(); + ctx.emit_safepoint(Inst::TrapIf { trap_code, cond }); + } + + Opcode::Trapif => { + let condcode = ctx.data(insn).cond_code().unwrap(); + let cond = Cond::from_intcc(condcode); + let is_signed = condcode_is_signed(condcode); + + // Verification ensures that the input is always a single-def ifcmp. + let cmp_insn = ctx + .get_input_as_source_or_const(inputs[0].insn, inputs[0].input) + .inst + .unwrap() + .0; + debug_assert_eq!(ctx.data(cmp_insn).opcode(), Opcode::Ifcmp); + lower_icmp_to_flags(ctx, cmp_insn, is_signed, true); + + let trap_code = ctx.data(insn).trap_code().unwrap(); + ctx.emit_safepoint(Inst::TrapIf { trap_code, cond }); + } + + Opcode::Debugtrap => { + ctx.emit(Inst::Debugtrap); + } + + Opcode::Call | Opcode::CallIndirect => { + let caller_conv = ctx.abi().call_conv(); + let (mut abi, inputs) = match op { + Opcode::Call => { + let (extname, dist) = ctx.call_target(insn).unwrap(); + let extname = extname.clone(); + let sig = ctx.call_sig(insn).unwrap(); + assert!(inputs.len() == sig.params.len()); + assert!(outputs.len() == sig.returns.len()); + ( + S390xABICaller::from_func(sig, &extname, dist, caller_conv, flags)?, + &inputs[..], + ) + } + Opcode::CallIndirect => { + let ptr = put_input_in_reg(ctx, inputs[0], NarrowValueMode::ZeroExtend64); + let sig = ctx.call_sig(insn).unwrap(); + assert!(inputs.len() - 1 == sig.params.len()); + assert!(outputs.len() == sig.returns.len()); + ( + S390xABICaller::from_ptr(sig, ptr, op, caller_conv, flags)?, + &inputs[1..], + ) + } + _ => unreachable!(), + }; + + assert!(inputs.len() == abi.num_args()); + for (i, input) in inputs.iter().enumerate() { + let arg_reg = put_input_in_reg(ctx, *input, NarrowValueMode::None); + abi.emit_copy_regs_to_arg(ctx, i, ValueRegs::one(arg_reg)); + } + abi.emit_call(ctx); + for (i, output) in outputs.iter().enumerate() { + let retval_reg = get_output_reg(ctx, *output).only_reg().unwrap(); + abi.emit_copy_retval_to_regs(ctx, i, ValueRegs::one(retval_reg)); + } + abi.accumulate_outgoing_args_size(ctx); + } + + Opcode::FallthroughReturn | Opcode::Return => { + for (i, input) in inputs.iter().enumerate() { + let reg = put_input_in_reg(ctx, *input, NarrowValueMode::None); + let retval_reg = ctx.retval(i).only_reg().unwrap(); + let ty = ctx.input_ty(insn, i); + ctx.emit(Inst::gen_move(retval_reg, reg, ty)); + } + // N.B.: the Ret itself is generated by the ABI. + } + + Opcode::AtomicRmw + | Opcode::AtomicCas + | Opcode::AtomicLoad + | Opcode::AtomicStore + | Opcode::Fence => { + // TODO + panic!("Atomic operations not implemented"); + } + + Opcode::RawBitcast + | Opcode::Splat + | Opcode::Swizzle + | Opcode::Insertlane + | Opcode::Extractlane + | Opcode::Imin + | Opcode::Umin + | Opcode::Imax + | Opcode::Umax + | Opcode::AvgRound + | Opcode::FminPseudo + | Opcode::FmaxPseudo + | Opcode::Uload8x8 + | Opcode::Uload8x8Complex + | Opcode::Sload8x8 + | Opcode::Sload8x8Complex + | Opcode::Uload16x4 + | Opcode::Uload16x4Complex + | Opcode::Sload16x4 + | Opcode::Sload16x4Complex + | Opcode::Uload32x2 + | Opcode::Uload32x2Complex + | Opcode::Sload32x2 + | Opcode::Sload32x2Complex + | Opcode::Vconst + | Opcode::Shuffle + | Opcode::Vsplit + | Opcode::Vconcat + | Opcode::Vselect + | Opcode::VanyTrue + | Opcode::VallTrue + | Opcode::VhighBits + | Opcode::ScalarToVector + | Opcode::Snarrow + | Opcode::Unarrow + | Opcode::SwidenLow + | Opcode::SwidenHigh + | Opcode::UwidenLow + | Opcode::UwidenHigh + | Opcode::WideningPairwiseDotProductS => { + // TODO + panic!("Vector ops not implemented."); + } + + Opcode::Isplit | Opcode::Iconcat => panic!("Wide integer ops not implemented."), + + Opcode::Spill + | Opcode::Fill + | Opcode::FillNop + | Opcode::Regmove + | Opcode::CopySpecial + | Opcode::CopyToSsa + | Opcode::CopyNop + | Opcode::AdjustSpDown + | Opcode::AdjustSpUpImm + | Opcode::AdjustSpDownImm + | Opcode::DummySargT + | Opcode::IfcmpSp + | Opcode::Regspill + | Opcode::Regfill => { + panic!("Unused opcode should not be encountered."); + } + + Opcode::Ifcmp + | Opcode::Ffcmp + | Opcode::Trapff + | Opcode::Trueif + | Opcode::Trueff + | Opcode::Selectif => { + panic!("Flags opcode should not be encountered."); + } + + Opcode::Jump + | Opcode::Fallthrough + | Opcode::Brz + | Opcode::Brnz + | Opcode::BrIcmp + | Opcode::Brif + | Opcode::Brff + | Opcode::IndirectJumpTableBr + | Opcode::BrTable => { + panic!("Branch opcode reached non-branch lowering logic!"); + } + + Opcode::JumpTableEntry | Opcode::JumpTableBase => { + panic!("Should not appear: we handle BrTable directly"); + } + + Opcode::Safepoint => { + panic!("safepoint instructions not used by new backend's safepoints!"); + } + + Opcode::IaddImm + | Opcode::ImulImm + | Opcode::UdivImm + | Opcode::SdivImm + | Opcode::UremImm + | Opcode::SremImm + | Opcode::IrsubImm + | Opcode::IaddCin + | Opcode::IaddIfcin + | Opcode::IaddCout + | Opcode::IaddIfcout + | Opcode::IaddCarry + | Opcode::IaddIfcarry + | Opcode::IsubBin + | Opcode::IsubIfbin + | Opcode::IsubBout + | Opcode::IsubIfbout + | Opcode::IsubBorrow + | Opcode::IsubIfborrow + | Opcode::BandImm + | Opcode::BorImm + | Opcode::BxorImm + | Opcode::RotlImm + | Opcode::RotrImm + | Opcode::IshlImm + | Opcode::UshrImm + | Opcode::SshrImm + | Opcode::IcmpImm + | Opcode::IfcmpImm => { + panic!("ALU+imm and ALU+carry ops should not appear here!"); + } + + #[cfg(feature = "x86")] + Opcode::X86Udivmodx + | Opcode::X86Sdivmodx + | Opcode::X86Umulx + | Opcode::X86Smulx + | Opcode::X86Cvtt2si + | Opcode::X86Fmin + | Opcode::X86Fmax + | Opcode::X86Push + | Opcode::X86Pop + | Opcode::X86Bsr + | Opcode::X86Bsf + | Opcode::X86Pblendw + | Opcode::X86Pshufd + | Opcode::X86Pshufb + | Opcode::X86Pextr + | Opcode::X86Pinsr + | Opcode::X86Insertps + | Opcode::X86Movsd + | Opcode::X86Movlhps + | Opcode::X86Psll + | Opcode::X86Psrl + | Opcode::X86Psra + | Opcode::X86Ptest + | Opcode::X86Pmaxs + | Opcode::X86Pmaxu + | Opcode::X86Pmins + | Opcode::X86Pminu + | Opcode::X86Pmullq + | Opcode::X86Pmuludq + | Opcode::X86Punpckh + | Opcode::X86Punpckl + | Opcode::X86Vcvtudq2ps + | Opcode::X86Palignr + | Opcode::X86ElfTlsGetAddr + | Opcode::X86MachoTlsGetAddr => { + panic!("x86-specific opcode in supposedly arch-neutral IR!"); + } + } + + Ok(()) +} + +//============================================================================ +// Lowering: main entry point for lowering a branch group + +fn lower_branch>( + ctx: &mut C, + branches: &[IRInst], + targets: &[MachLabel], +) -> CodegenResult<()> { + // A block should end with at most two branches. The first may be a + // conditional branch; a conditional branch can be followed only by an + // unconditional branch or fallthrough. Otherwise, if only one branch, + // it may be an unconditional branch, a fallthrough, a return, or a + // trap. These conditions are verified by `is_ebb_basic()` during the + // verifier pass. + assert!(branches.len() <= 2); + + if branches.len() == 2 { + // Must be a conditional branch followed by an unconditional branch. + let op0 = ctx.data(branches[0]).opcode(); + let op1 = ctx.data(branches[1]).opcode(); + + assert!(op1 == Opcode::Jump || op1 == Opcode::Fallthrough); + let taken = BranchTarget::Label(targets[0]); + let not_taken = BranchTarget::Label(targets[1]); + + match op0 { + Opcode::Brz | Opcode::Brnz => { + let flag_input = InsnInput { + insn: branches[0], + input: 0, + }; + let cond = lower_boolean_to_flags(ctx, flag_input); + let negated = op0 == Opcode::Brz; + let cond = if negated { cond.invert() } else { cond }; + ctx.emit(Inst::CondBr { + taken, + not_taken, + cond, + }); + } + + Opcode::Brif => { + let condcode = ctx.data(branches[0]).cond_code().unwrap(); + let cond = Cond::from_intcc(condcode); + let is_signed = condcode_is_signed(condcode); + + // Verification ensures that the input is always a single-def ifcmp. + let cmp_insn = ctx + .get_input_as_source_or_const(branches[0], 0) + .inst + .unwrap() + .0; + debug_assert_eq!(ctx.data(cmp_insn).opcode(), Opcode::Ifcmp); + lower_icmp_to_flags(ctx, cmp_insn, is_signed, true); + + ctx.emit(Inst::CondBr { + taken, + not_taken, + cond, + }); + } + + Opcode::Brff => unreachable!(), + + _ => unimplemented!(), + } + } else { + // Must be an unconditional branch or an indirect branch. + let op = ctx.data(branches[0]).opcode(); + match op { + Opcode::Jump | Opcode::Fallthrough => { + assert!(branches.len() == 1); + // In the Fallthrough case, the machine-independent driver + // fills in `targets[0]` with our fallthrough block, so this + // is valid for both Jump and Fallthrough. + ctx.emit(Inst::Jump { + dest: BranchTarget::Label(targets[0]), + }); + } + + Opcode::BrTable => { + let jt_size = targets.len() - 1; + assert!(jt_size <= std::u32::MAX as usize); + + // Load up jump table element index. + let ridx = put_input_in_reg( + ctx, + InsnInput { + insn: branches[0], + input: 0, + }, + NarrowValueMode::ZeroExtend64, + ); + + // Temp registers needed by the compound instruction. + let rtmp1 = ctx.alloc_tmp(types::I64).only_reg().unwrap(); + let rtmp2 = ctx.alloc_tmp(types::I64).only_reg().unwrap(); + + // Emit the compound instruction that does: + // + // clgfi %rIdx, + // jghe + // sllg %rTmp2, %rIdx, 2 + // larl %rTmp1, + // lgf %rTmp2, 0(%rTmp2, %rTmp1) + // agrk %rTmp1, %rTmp1, %rTmp2 + // br %rA + // [jt entries] + // + // This must be *one* instruction in the vcode because + // we cannot allow regalloc to insert any spills/fills + // in the middle of the sequence; otherwise, the ADR's + // PC-rel offset to the jumptable would be incorrect. + // (The alternative is to introduce a relocation pass + // for inlined jumptables, which is much worse, IMHO.) + + let default_target = BranchTarget::Label(targets[0]); + let jt_targets: Vec = targets + .iter() + .skip(1) + .map(|bix| BranchTarget::Label(*bix)) + .collect(); + let targets_for_term: Vec = targets.to_vec(); + ctx.emit(Inst::JTSequence { + ridx, + rtmp1, + rtmp2, + info: Box::new(JTSequenceInfo { + default_target, + targets: jt_targets, + targets_for_term: targets_for_term, + }), + }); + } + + _ => panic!("Unknown branch type!"), + } + } + + Ok(()) +} + +//============================================================================= +// Lowering-backend trait implementation. + +impl LowerBackend for S390xBackend { + type MInst = Inst; + + fn lower>(&self, ctx: &mut C, ir_inst: IRInst) -> CodegenResult<()> { + lower_insn_to_regs(ctx, ir_inst, &self.flags) + } + + fn lower_branch_group>( + &self, + ctx: &mut C, + branches: &[IRInst], + targets: &[MachLabel], + ) -> CodegenResult<()> { + lower_branch(ctx, branches, targets) + } +} diff --git a/cranelift/codegen/src/isa/s390x/mod.rs b/cranelift/codegen/src/isa/s390x/mod.rs new file mode 100644 index 0000000000..3a78b54c95 --- /dev/null +++ b/cranelift/codegen/src/isa/s390x/mod.rs @@ -0,0 +1,296 @@ +//! IBM Z 64-bit Instruction Set Architecture. + +use crate::ir::condcodes::IntCC; +use crate::ir::Function; +use crate::isa::s390x::settings as s390x_settings; +use crate::isa::unwind::systemv::RegisterMappingError; +use crate::isa::Builder as IsaBuilder; +use crate::machinst::{compile, MachBackend, MachCompileResult, TargetIsaAdapter, VCode}; +use crate::result::CodegenResult; +use crate::settings as shared_settings; + +use alloc::{boxed::Box, vec::Vec}; +use core::hash::{Hash, Hasher}; + +use regalloc::{PrettyPrint, RealRegUniverse, Reg}; +use target_lexicon::{Architecture, Triple}; + +// New backend: +mod abi; +pub(crate) mod inst; +mod lower; +mod settings; + +use inst::create_reg_universe; + +use self::inst::EmitInfo; + +/// A IBM Z backend. +pub struct S390xBackend { + triple: Triple, + flags: shared_settings::Flags, + isa_flags: s390x_settings::Flags, + reg_universe: RealRegUniverse, +} + +impl S390xBackend { + /// Create a new IBM Z backend with the given (shared) flags. + pub fn new_with_flags( + triple: Triple, + flags: shared_settings::Flags, + isa_flags: s390x_settings::Flags, + ) -> S390xBackend { + let reg_universe = create_reg_universe(&flags); + S390xBackend { + triple, + flags, + isa_flags, + reg_universe, + } + } + + /// This performs lowering to VCode, register-allocates the code, computes block layout and + /// finalizes branches. The result is ready for binary emission. + fn compile_vcode( + &self, + func: &Function, + flags: shared_settings::Flags, + ) -> CodegenResult> { + let emit_info = EmitInfo::new(flags.clone()); + let abi = Box::new(abi::S390xABICallee::new(func, flags)?); + compile::compile::(func, self, abi, emit_info) + } +} + +impl MachBackend for S390xBackend { + fn compile_function( + &self, + func: &Function, + want_disasm: bool, + ) -> CodegenResult { + let flags = self.flags(); + let vcode = self.compile_vcode(func, flags.clone())?; + let buffer = vcode.emit(); + let frame_size = vcode.frame_size(); + let value_labels_ranges = vcode.value_labels_ranges(); + let stackslot_offsets = vcode.stackslot_offsets().clone(); + + let disasm = if want_disasm { + Some(vcode.show_rru(Some(&create_reg_universe(flags)))) + } else { + None + }; + + let buffer = buffer.finish(); + + Ok(MachCompileResult { + buffer, + frame_size, + disasm, + value_labels_ranges, + stackslot_offsets, + }) + } + + fn name(&self) -> &'static str { + "s390x" + } + + fn triple(&self) -> Triple { + self.triple.clone() + } + + fn flags(&self) -> &shared_settings::Flags { + &self.flags + } + + fn isa_flags(&self) -> Vec { + self.isa_flags.iter().collect() + } + + fn hash_all_flags(&self, mut hasher: &mut dyn Hasher) { + self.flags.hash(&mut hasher); + self.isa_flags.hash(&mut hasher); + } + + fn reg_universe(&self) -> &RealRegUniverse { + &self.reg_universe + } + + fn unsigned_add_overflow_condition(&self) -> IntCC { + unimplemented!() + } + + fn unsigned_sub_overflow_condition(&self) -> IntCC { + unimplemented!() + } + + #[cfg(feature = "unwind")] + fn emit_unwind_info( + &self, + result: &MachCompileResult, + kind: crate::machinst::UnwindInfoKind, + ) -> CodegenResult> { + use crate::isa::unwind::UnwindInfo; + use crate::machinst::UnwindInfoKind; + Ok(match kind { + UnwindInfoKind::SystemV => { + let mapper = self::inst::unwind::systemv::RegisterMapper; + Some(UnwindInfo::SystemV( + crate::isa::unwind::systemv::create_unwind_info_from_insts( + &result.buffer.unwind_info[..], + result.buffer.data.len(), + &mapper, + )?, + )) + } + _ => None, + }) + } + + #[cfg(feature = "unwind")] + fn create_systemv_cie(&self) -> Option { + Some(inst::unwind::systemv::create_cie()) + } + + #[cfg(feature = "unwind")] + fn map_reg_to_dwarf(&self, reg: Reg) -> Result { + inst::unwind::systemv::map_reg(reg).map(|reg| reg.0) + } +} + +/// Create a new `isa::Builder`. +pub fn isa_builder(triple: Triple) -> IsaBuilder { + assert!(triple.architecture == Architecture::S390x); + IsaBuilder { + triple, + setup: s390x_settings::builder(), + constructor: |triple, shared_flags, builder| { + let isa_flags = s390x_settings::Flags::new(&shared_flags, builder); + let backend = S390xBackend::new_with_flags(triple, shared_flags, isa_flags); + Box::new(TargetIsaAdapter::new(backend)) + }, + } +} + +#[cfg(test)] +mod test { + use super::*; + use crate::cursor::{Cursor, FuncCursor}; + use crate::ir::types::*; + use crate::ir::{AbiParam, ExternalName, Function, InstBuilder, Signature}; + use crate::isa::CallConv; + use crate::settings; + use crate::settings::Configurable; + use core::str::FromStr; + use target_lexicon::Triple; + + #[test] + fn test_compile_function() { + let name = ExternalName::testcase("test0"); + let mut sig = Signature::new(CallConv::SystemV); + sig.params.push(AbiParam::new(I32)); + sig.returns.push(AbiParam::new(I32)); + let mut func = Function::with_name_signature(name, sig); + + let bb0 = func.dfg.make_block(); + let arg0 = func.dfg.append_block_param(bb0, I32); + + let mut pos = FuncCursor::new(&mut func); + pos.insert_block(bb0); + let v0 = pos.ins().iconst(I32, 0x1234); + let v1 = pos.ins().iadd(arg0, v0); + pos.ins().return_(&[v1]); + + let mut shared_flags_builder = settings::builder(); + shared_flags_builder.set("opt_level", "none").unwrap(); + let shared_flags = settings::Flags::new(shared_flags_builder); + let isa_flags = s390x_settings::Flags::new(&shared_flags, s390x_settings::builder()); + let backend = S390xBackend::new_with_flags( + Triple::from_str("s390x").unwrap(), + shared_flags, + isa_flags, + ); + let result = backend + .compile_function(&mut func, /* want_disasm = */ false) + .unwrap(); + let code = &result.buffer.data[..]; + + // ahi %r2, 0x1234 + // br %r14 + let golden = vec![0xa7, 0x2a, 0x12, 0x34, 0x07, 0xfe]; + + assert_eq!(code, &golden[..]); + } + + #[test] + fn test_branch_lowering() { + let name = ExternalName::testcase("test0"); + let mut sig = Signature::new(CallConv::SystemV); + sig.params.push(AbiParam::new(I32)); + sig.returns.push(AbiParam::new(I32)); + let mut func = Function::with_name_signature(name, sig); + + let bb0 = func.dfg.make_block(); + let arg0 = func.dfg.append_block_param(bb0, I32); + let bb1 = func.dfg.make_block(); + let bb2 = func.dfg.make_block(); + let bb3 = func.dfg.make_block(); + + let mut pos = FuncCursor::new(&mut func); + pos.insert_block(bb0); + let v0 = pos.ins().iconst(I32, 0x1234); + let v1 = pos.ins().iadd(arg0, v0); + pos.ins().brnz(v1, bb1, &[]); + pos.ins().jump(bb2, &[]); + pos.insert_block(bb1); + pos.ins().brnz(v1, bb2, &[]); + pos.ins().jump(bb3, &[]); + pos.insert_block(bb2); + let v2 = pos.ins().iadd(v1, v0); + pos.ins().brnz(v2, bb2, &[]); + pos.ins().jump(bb1, &[]); + pos.insert_block(bb3); + let v3 = pos.ins().isub(v1, v0); + pos.ins().return_(&[v3]); + + let mut shared_flags_builder = settings::builder(); + shared_flags_builder.set("opt_level", "none").unwrap(); + let shared_flags = settings::Flags::new(shared_flags_builder); + let isa_flags = s390x_settings::Flags::new(&shared_flags, s390x_settings::builder()); + let backend = S390xBackend::new_with_flags( + Triple::from_str("s390x").unwrap(), + shared_flags, + isa_flags, + ); + let result = backend + .compile_function(&mut func, /* want_disasm = */ false) + .unwrap(); + let code = &result.buffer.data[..]; + + // FIXME: the branching logic should be optimized more + + // ahi %r2, 4660 + // chi %r2, 0 + // jglh label1 ; jg label2 + // jg label6 + // jg label3 + // ahik %r3, %r2, 4660 + // chi %r3, 0 + // jglh label4 ; jg label5 + // jg label3 + // jg label6 + // chi %r2, 0 + // jglh label7 ; jg label8 + // jg label3 + // ahi %r2, -4660 + // br %r14 + let golden = vec![ + 167, 42, 18, 52, 167, 46, 0, 0, 192, 100, 0, 0, 0, 11, 236, 50, 18, 52, 0, 216, 167, + 62, 0, 0, 192, 100, 255, 255, 255, 251, 167, 46, 0, 0, 192, 100, 255, 255, 255, 246, + 167, 42, 237, 204, 7, 254, + ]; + + assert_eq!(code, &golden[..]); + } +} diff --git a/cranelift/codegen/src/isa/s390x/settings.rs b/cranelift/codegen/src/isa/s390x/settings.rs new file mode 100644 index 0000000000..69859cee4f --- /dev/null +++ b/cranelift/codegen/src/isa/s390x/settings.rs @@ -0,0 +1,9 @@ +//! S390X Settings. + +use crate::settings::{self, detail, Builder, Value}; +use core::fmt; + +// Include code generated by `cranelift-codegen/meta/src/gen_settings.rs:`. This file contains a +// public `Flags` struct with an impl for all of the settings defined in +// `cranelift-codegen/meta/src/isa/s390x/settings.rs`. +include!(concat!(env!("OUT_DIR"), "/settings-s390x.rs")); diff --git a/cranelift/filetests/filetests/isa/s390x/arithmetic.clif b/cranelift/filetests/filetests/isa/s390x/arithmetic.clif new file mode 100644 index 0000000000..479268ec0c --- /dev/null +++ b/cranelift/filetests/filetests/isa/s390x/arithmetic.clif @@ -0,0 +1,1136 @@ +test compile +target s390x + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; IADD +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +function %iadd_i64(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2 = iadd.i64 v0, v1 + return v2 +} + +; check: agr %r2, %r3 +; nextln: br %r14 + +function %iadd_i64_ext32(i64, i32) -> i64 { +block0(v0: i64, v1: i32): + v2 = sextend.i64 v1 + v3 = iadd.i64 v0, v2 + return v3 +} + +; check: agfr %r2, %r3 +; nextln: br %r14 + +function %iadd_i64_imm16(i64) -> i64 { +block0(v0: i64): + v1 = iconst.i64 1 + v2 = iadd.i64 v0, v1 + return v2 +} + +; check: aghi %r2, 1 +; nextln: br %r14 + +function %iadd_i64_imm32(i64) -> i64 { +block0(v0: i64): + v1 = iconst.i64 32768 + v2 = iadd.i64 v0, v1 + return v2 +} + +; check: agfi %r2, 32768 +; nextln: br %r14 + +function %iadd_i64_mem(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2 = load.i64 v1 + v3 = iadd.i64 v0, v2 + return v3 +} + +; check: ag %r2, 0(%r3) +; nextln: br %r14 + +function %iadd_i64_mem_ext16(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2 = sload16.i64 v1 + v3 = iadd.i64 v0, v2 + return v3 +} + +; check: agh %r2, 0(%r3) +; nextln: br %r14 + +function %iadd_i64_mem_ext32(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2 = sload32.i64 v1 + v3 = iadd.i64 v0, v2 + return v3 +} + +; check: agf %r2, 0(%r3) +; nextln: br %r14 + +function %iadd_i32(i32, i32) -> i32 { +block0(v0: i32, v1: i32): + v2 = iadd.i32 v0, v1 + return v2 +} + +; check: ar %r2, %r3 +; nextln: br %r14 + +function %iadd_i32_imm16(i32) -> i32 { +block0(v0: i32): + v1 = iconst.i32 1 + v2 = iadd.i32 v0, v1 + return v2 +} + +; check: ahi %r2, 1 +; nextln: br %r14 + +function %iadd_i32_imm(i32) -> i32 { +block0(v0: i32): + v1 = iconst.i32 32768 + v2 = iadd.i32 v0, v1 + return v2 +} + +; check: afi %r2, 32768 +; nextln: br %r14 + +function %iadd_i32_mem(i32, i64) -> i32 { +block0(v0: i32, v1: i64): + v2 = load.i32 v1 + v3 = iadd.i32 v0, v2 + return v3 +} + +; check: a %r2, 0(%r3) +; nextln: br %r14 + +function %iadd_i32_memoff(i32, i64) -> i32 { +block0(v0: i32, v1: i64): + v2 = load.i32 v1+4096 + v3 = iadd.i32 v0, v2 + return v3 +} + +; check: ay %r2, 4096(%r3) +; nextln: br %r14 + +function %iadd_i32_mem_ext16(i32, i64) -> i32 { +block0(v0: i32, v1: i64): + v2 = sload16.i32 v1 + v3 = iadd.i32 v0, v2 + return v3 +} + +; check: ah %r2, 0(%r3) +; nextln: br %r14 + +function %iadd_i32_memoff_ext16(i32, i64) -> i32 { +block0(v0: i32, v1: i64): + v2 = sload16.i32 v1+4096 + v3 = iadd.i32 v0, v2 + return v3 +} + +; check: ahy %r2, 4096(%r3) +; nextln: br %r14 + +function %iadd_i16(i16, i16) -> i16 { +block0(v0: i16, v1: i16): + v2 = iadd.i16 v0, v1 + return v2 +} + +; check: ar %r2, %r3 +; nextln: br %r14 + +function %iadd_i16_imm(i16) -> i16 { +block0(v0: i16): + v1 = iconst.i16 1 + v2 = iadd.i16 v0, v1 + return v2 +} + +; check: ahi %r2, 1 +; nextln: br %r14 + +function %iadd_i16_mem(i16, i64) -> i16 { +block0(v0: i16, v1: i64): + v2 = load.i16 v1 + v3 = iadd.i16 v0, v2 + return v3 +} + +; check: ah %r2, 0(%r3) +; nextln: br %r14 + +function %iadd_i8(i8, i8) -> i8 { +block0(v0: i8, v1: i8): + v2 = iadd.i8 v0, v1 + return v2 +} + +; check: ar %r2, %r3 +; nextln: br %r14 + +function %iadd_i8_imm(i8) -> i8 { +block0(v0: i8): + v1 = iconst.i8 1 + v2 = iadd.i8 v0, v1 + return v2 +} + +; check: ahi %r2, 1 +; nextln: br %r14 + +function %iadd_i8_mem(i8, i64) -> i8 { +block0(v0: i8, v1: i64): + v2 = load.i8 v1 + v3 = iadd.i8 v0, v2 + return v3 +} + +; check: llc %r3, 0(%r3) +; nextln: ar %r2, %r3 +; nextln: br %r14 + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; ISUB +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +function %isub_i64(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2 = isub.i64 v0, v1 + return v2 +} + +; check: sgr %r2, %r3 +; nextln: br %r14 + +function %isub_i64_ext32(i64, i32) -> i64 { +block0(v0: i64, v1: i32): + v2 = sextend.i64 v1 + v3 = isub.i64 v0, v2 + return v3 +} + +; check: sgfr %r2, %r3 +; nextln: br %r14 + +function %isub_i64_imm16(i64) -> i64 { +block0(v0: i64): + v1 = iconst.i64 1 + v2 = isub.i64 v0, v1 + return v2 +} + +; check: aghi %r2, -1 +; nextln: br %r14 + +function %isub_i64_imm32(i64) -> i64 { +block0(v0: i64): + v1 = iconst.i64 32769 + v2 = isub.i64 v0, v1 + return v2 +} + +; check: agfi %r2, -32769 +; nextln: br %r14 + +function %isub_i64_mem(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2 = load.i64 v1 + v3 = isub.i64 v0, v2 + return v3 +} + +; check: sg %r2, 0(%r3) +; nextln: br %r14 + +function %isub_i64_mem_ext16(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2 = sload16.i64 v1 + v3 = isub.i64 v0, v2 + return v3 +} + +; check: sgh %r2, 0(%r3) +; nextln: br %r14 + +function %isub_i64_mem_ext32(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2 = sload32.i64 v1 + v3 = isub.i64 v0, v2 + return v3 +} + +; check: sgf %r2, 0(%r3) +; nextln: br %r14 + +function %isub_i32(i32, i32) -> i32 { +block0(v0: i32, v1: i32): + v2 = isub.i32 v0, v1 + return v2 +} + +; check: sr %r2, %r3 +; nextln: br %r14 + +function %isub_i32_imm16(i32) -> i32 { +block0(v0: i32): + v1 = iconst.i32 1 + v2 = isub.i32 v0, v1 + return v2 +} + +; check: ahi %r2, -1 +; nextln: br %r14 + +function %isub_i32_imm(i32) -> i32 { +block0(v0: i32): + v1 = iconst.i32 32769 + v2 = isub.i32 v0, v1 + return v2 +} + +; check: afi %r2, -32769 +; nextln: br %r14 + +function %isub_i32_mem(i32, i64) -> i32 { +block0(v0: i32, v1: i64): + v2 = load.i32 v1 + v3 = isub.i32 v0, v2 + return v3 +} + +; check: s %r2, 0(%r3) +; nextln: br %r14 + +function %isub_i32_memoff(i32, i64) -> i32 { +block0(v0: i32, v1: i64): + v2 = load.i32 v1+4096 + v3 = isub.i32 v0, v2 + return v3 +} + +; check: sy %r2, 4096(%r3) +; nextln: br %r14 + +function %isub_i32_mem_ext16(i32, i64) -> i32 { +block0(v0: i32, v1: i64): + v2 = sload16.i32 v1 + v3 = isub.i32 v0, v2 + return v3 +} + +; check: sh %r2, 0(%r3) +; nextln: br %r14 + +function %isub_i32_memoff_ext16(i32, i64) -> i32 { +block0(v0: i32, v1: i64): + v2 = sload16.i32 v1+4096 + v3 = isub.i32 v0, v2 + return v3 +} + +; check: shy %r2, 4096(%r3) +; nextln: br %r14 + +function %isub_i16(i16, i16) -> i16 { +block0(v0: i16, v1: i16): + v2 = isub.i16 v0, v1 + return v2 +} + +; check: sr %r2, %r3 +; nextln: br %r14 + +function %isub_i16_imm(i16) -> i16 { +block0(v0: i16): + v1 = iconst.i16 1 + v2 = isub.i16 v0, v1 + return v2 +} + +; check: ahi %r2, -1 +; nextln: br %r14 + +function %isub_i16_mem(i16, i64) -> i16 { +block0(v0: i16, v1: i64): + v2 = load.i16 v1 + v3 = isub.i16 v0, v2 + return v3 +} + +; check: sh %r2, 0(%r3) +; nextln: br %r14 + +function %isub_i8(i8, i8) -> i8 { +block0(v0: i8, v1: i8): + v2 = isub.i8 v0, v1 + return v2 +} + +; check: sr %r2, %r3 +; nextln: br %r14 + +function %isub_i8_imm(i8) -> i8 { +block0(v0: i8): + v1 = iconst.i8 1 + v2 = isub.i8 v0, v1 + return v2 +} + +; check: ahi %r2, -1 +; nextln: br %r14 + +function %isub_i8_mem(i8, i64) -> i8 { +block0(v0: i8, v1: i64): + v2 = load.i8 v1 + v3 = isub.i8 v0, v2 + return v3 +} + +; check: llc %r3, 0(%r3) +; nextln: sr %r2, %r3 +; nextln: br %r14 + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; IABS +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +function %iabs_i64(i64) -> i64 { +block0(v0: i64): + v1 = iabs.i64 v0 + return v1 +} + +; check: lpgr %r2, %r2 +; nextln: br %r14 + +function %iabs_i64_ext32(i32) -> i64 { +block0(v0: i32): + v1 = sextend.i64 v0 + v2 = iabs.i64 v1 + return v2 +} + +; check: lpgfr %r2, %r2 +; nextln: br %r14 + +function %iabs_i32(i32) -> i32 { +block0(v0: i32): + v1 = iabs.i32 v0 + return v1 +} + +; check: lpr %r2, %r2 +; nextln: br %r14 + +function %iabs_i16(i16) -> i16 { +block0(v0: i16): + v1 = iabs.i16 v0 + return v1 +} + +; check: lhr %r2, %r2 +; nextln: lpr %r2, %r2 +; nextln: br %r14 + +function %iabs_i8(i8) -> i8 { +block0(v0: i8): + v1 = iabs.i8 v0 + return v1 +} + +; check: lbr %r2, %r2 +; nextln: lpr %r2, %r2 +; nextln: br %r14 + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; INEG +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +; FIXME: neg-abs combination not yet supported + +function %ineg_i64(i64) -> i64 { +block0(v0: i64): + v1 = ineg.i64 v0 + return v1 +} + +; check: lcgr %r2, %r2 +; nextln: br %r14 + +function %ineg_i64_ext32(i32) -> i64 { +block0(v0: i32): + v1 = sextend.i64 v0 + v2 = ineg.i64 v1 + return v2 +} + +; check: lcgfr %r2, %r2 +; nextln: br %r14 + +function %ineg_i32(i32) -> i32 { +block0(v0: i32): + v1 = ineg.i32 v0 + return v1 +} + +; check: lcr %r2, %r2 +; nextln: br %r14 + +function %ineg_i16(i16) -> i16 { +block0(v0: i16): + v1 = ineg.i16 v0 + return v1 +} + +; check: lcr %r2, %r2 +; nextln: br %r14 + +function %ineg_i8(i8) -> i8 { +block0(v0: i8): + v1 = ineg.i8 v0 + return v1 +} + +; check: lcr %r2, %r2 +; nextln: br %r14 + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; IMUL +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +function %imul_i64(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2 = imul.i64 v0, v1 + return v2 +} + +; check: msgr %r2, %r3 +; nextln: br %r14 + +function %imul_i64_imm16(i64) -> i64 { +block0(v0: i64): + v1 = iconst.i64 3 + v2 = imul.i64 v0, v1 + return v2 +} + +; check: mghi %r2, 3 +; nextln: br %r14 + +function %imul_i64_imm32(i64) -> i64 { +block0(v0: i64): + v1 = iconst.i64 32769 + v2 = imul.i64 v0, v1 + return v2 +} + +; check: msgfi %r2, 32769 +; nextln: br %r14 + +function %imul_i64_mem(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2 = load.i64 v1 + v3 = imul.i64 v0, v2 + return v3 +} + +; check: msg %r2, 0(%r3) +; nextln: br %r14 + +function %imul_i64_mem_ext16(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2 = sload16.i64 v1 + v3 = imul.i64 v0, v2 + return v3 +} + +; check: mgh %r2, 0(%r3) +; nextln: br %r14 + +function %imul_i64_mem_ext32(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2 = sload32.i64 v1 + v3 = imul.i64 v0, v2 + return v3 +} + +; check: msgf %r2, 0(%r3) +; nextln: br %r14 + +function %imul_i32(i32, i32) -> i32 { +block0(v0: i32, v1: i32): + v2 = imul.i32 v0, v1 + return v2 +} + +; check: msr %r2, %r3 +; nextln: br %r14 + +function %imul_i32_imm16(i32) -> i32 { +block0(v0: i32): + v1 = iconst.i32 3 + v2 = imul.i32 v0, v1 + return v2 +} + +; check: mhi %r2, 3 +; nextln: br %r14 + +function %imul_i32_imm32(i32) -> i32 { +block0(v0: i32): + v1 = iconst.i32 32769 + v2 = imul.i32 v0, v1 + return v2 +} + +; check: msfi %r2, 32769 +; nextln: br %r14 + +function %imul_i32_mem(i32, i64) -> i32 { +block0(v0: i32, v1: i64): + v2 = load.i32 v1 + v3 = imul.i32 v0, v2 + return v3 +} + +; check: ms %r2, 0(%r3) +; nextln: br %r14 + +function %imul_i32_memoff(i32, i64) -> i32 { +block0(v0: i32, v1: i64): + v2 = load.i32 v1+4096 + v3 = imul.i32 v0, v2 + return v3 +} + +; check: msy %r2, 4096(%r3) +; nextln: br %r14 + +function %imul_i32_mem_ext16(i32, i64) -> i32 { +block0(v0: i32, v1: i64): + v2 = sload16.i32 v1 + v3 = imul.i32 v0, v2 + return v3 +} + +; check: mh %r2, 0(%r3) +; nextln: br %r14 + +function %imul_i32_memoff_ext16(i32, i64) -> i32 { +block0(v0: i32, v1: i64): + v2 = sload16.i32 v1+4096 + v3 = imul.i32 v0, v2 + return v3 +} + +; check: mhy %r2, 4096(%r3) +; nextln: br %r14 + +function %imul_i16(i16, i16) -> i16 { +block0(v0: i16, v1: i16): + v2 = imul.i16 v0, v1 + return v2 +} + +; check: msr %r2, %r3 +; nextln: br %r14 + +function %imul_i16_imm(i16) -> i16 { +block0(v0: i16): + v1 = iconst.i16 3 + v2 = imul.i16 v0, v1 + return v2 +} + +; check: mhi %r2, 3 +; nextln: br %r14 + +function %imul_i16_mem(i16, i64) -> i16 { +block0(v0: i16, v1: i64): + v2 = load.i16 v1 + v3 = imul.i16 v0, v2 + return v3 +} + +; check: mh %r2, 0(%r3) +; nextln: br %r14 + +function %imul_i8(i8, i8) -> i8 { +block0(v0: i8, v1: i8): + v2 = imul.i8 v0, v1 + return v2 +} + +; check: msr %r2, %r3 +; nextln: br %r14 + +function %imul_i8_imm(i8) -> i8 { +block0(v0: i8): + v1 = iconst.i8 3 + v2 = imul.i8 v0, v1 + return v2 +} + +; check: mhi %r2, 3 +; nextln: br %r14 + +function %imul_i8_mem(i8, i64) -> i8 { +block0(v0: i8, v1: i64): + v2 = load.i8 v1 + v3 = imul.i8 v0, v2 + return v3 +} + +; check: llc %r3, 0(%r3) +; nextln: msr %r2, %r3 +; nextln: br %r14 + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; UMULHI +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +function %umulhi_i64(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2 = umulhi.i64 v0, v1 + return v2 +} + +; check: lgr %r1, %r3 +; nextln: mlgr %r0, %r2 +; nextln: lgr %r2, %r0 +; nextln: br %r14 + +function %umulhi_i32(i32, i32) -> i32 { +block0(v0: i32, v1: i32): + v2 = umulhi.i32 v0, v1 + return v2 +} + +; check: llgfr %r2, %r2 +; nextln: llgfr %r3, %r3 +; nextln: msgr %r2, %r3 +; nextln: srlg %r2, %r2, 32 +; nextln: br %r14 + +function %umulhi_i16(i16, i16) -> i16 { +block0(v0: i16, v1: i16): + v2 = umulhi.i16 v0, v1 + return v2 +} + +; check: llhr %r2, %r2 +; nextln: llhr %r3, %r3 +; nextln: msr %r2, %r3 +; nextln: srlk %r2, %r2, 16 +; nextln: br %r14 + +function %umulhi_i8(i8, i8) -> i8 { +block0(v0: i8, v1: i8): + v2 = umulhi.i8 v0, v1 + return v2 +} + +; check: llcr %r2, %r2 +; nextln: llcr %r3, %r3 +; nextln: msr %r2, %r3 +; nextln: srlk %r2, %r2, 8 +; nextln: br %r14 + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; SMULHI +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +function %smulhi_i64(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2 = smulhi.i64 v0, v1 + return v2 +} + +; check: mgrk %r0, %r2, %r3 +; nextln: lgr %r2, %r0 +; nextln: br %r14 + +function %smulhi_i32(i32, i32) -> i32 { +block0(v0: i32, v1: i32): + v2 = smulhi.i32 v0, v1 + return v2 +} + +; check: lgfr %r2, %r2 +; nextln: lgfr %r3, %r3 +; nextln: msgr %r2, %r3 +; nextln: srag %r2, %r2, 32 +; nextln: br %r14 + +function %smulhi_i16(i16, i16) -> i16 { +block0(v0: i16, v1: i16): + v2 = smulhi.i16 v0, v1 + return v2 +} + +; check: lhr %r2, %r2 +; nextln: lhr %r3, %r3 +; nextln: msr %r2, %r3 +; nextln: srak %r2, %r2, 16 +; nextln: br %r14 + +function %smulhi_i8(i8, i8) -> i8 { +block0(v0: i8, v1: i8): + v2 = smulhi.i8 v0, v1 + return v2 +} + +; check: lbr %r2, %r2 +; nextln: lbr %r3, %r3 +; nextln: msr %r2, %r3 +; nextln: srak %r2, %r2, 8 +; nextln: br %r14 + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; SDIV +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +function %sdiv_i64(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2 = sdiv.i64 v0, v1 + return v2 +} + +; check: lgr %r1, %r2 +; nextln: llihf %r2, 2147483647 +; nextln: iilf %r2, 4294967295 +; nextln: xgr %r2, %r1 +; nextln: ngr %r2, %r3 +; nextln: cgite %r2, -1 +; nextln: dsgr %r0, %r3 +; nextln: lgr %r2, %r1 +; nextln: br %r14 + +function %sdiv_i64_imm(i64) -> i64 { +block0(v0: i64): + v1 = iconst.i64 2 + v2 = sdiv.i64 v0, v1 + return v2 +} + +; check: lgr %r1, %r2 +; nextln: lghi %r2, 2 +; nextln: dsgr %r0, %r2 +; nextln: lgr %r2, %r1 +; nextln: br %r14 + +function %sdiv_i32(i32, i32) -> i32 { +block0(v0: i32, v1: i32): + v2 = sdiv.i32 v0, v1 + return v2 +} + +; check: lgfr %r1, %r2 +; nextln: iilf %r2, 2147483647 +; nextln: xr %r2, %r1 +; nextln: nr %r2, %r3 +; nextln: cite %r2, -1 +; nextln: dsgfr %r0, %r3 +; nextln: lr %r2, %r1 +; nextln: br %r14 + +function %sdiv_i32_imm(i32) -> i32 { +block0(v0: i32): + v1 = iconst.i32 2 + v2 = sdiv.i32 v0, v1 + return v2 +} + +; check: lgfr %r1, %r2 +; nextln: lhi %r2, 2 +; nextln: dsgfr %r0, %r2 +; nextln: lr %r2, %r1 +; nextln: br %r14 + +function %sdiv_i16(i16, i16) -> i16 { +block0(v0: i16, v1: i16): + v2 = sdiv.i16 v0, v1 + return v2 +} + +; check: lghr %r1, %r2 +; nextln: lhr %r2, %r3 +; nextln: lhi %r3, 32767 +; nextln: xr %r3, %r1 +; nextln: nr %r3, %r2 +; nextln: cite %r3, -1 +; nextln: dsgfr %r0, %r2 +; nextln: lr %r2, %r1 +; nextln: br %r14 + +function %sdiv_i16_imm(i16) -> i16 { +block0(v0: i16): + v1 = iconst.i16 2 + v2 = sdiv.i16 v0, v1 + return v2 +} + +; check: lghr %r1, %r2 +; nextln: lhi %r2, 2 +; nextln: dsgfr %r0, %r2 +; nextln: lr %r2, %r1 +; nextln: br %r14 + +function %sdiv_i8(i8, i8) -> i8 { +block0(v0: i8, v1: i8): + v2 = sdiv.i8 v0, v1 + return v2 +} + +; check: lgbr %r1, %r2 +; nextln: lbr %r2, %r3 +; nextln: lhi %r3, 127 +; nextln: xr %r3, %r1 +; nextln: nr %r3, %r2 +; nextln: cite %r3, -1 +; nextln: dsgfr %r0, %r2 +; nextln: lr %r2, %r1 +; nextln: br %r14 + +function %sdiv_i8_imm(i8) -> i8 { +block0(v0: i8): + v1 = iconst.i8 2 + v2 = sdiv.i8 v0, v1 + return v2 +} + +; check: lgbr %r1, %r2 +; nextln: lhi %r2, 2 +; nextln: dsgfr %r0, %r2 +; nextln: lr %r2, %r1 +; nextln: br %r14 + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; UDIV +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +function %udiv_i64(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2 = udiv.i64 v0, v1 + return v2 +} + +; check: lghi %r0, 0 +; nextln: lgr %r1, %r2 +; nextln: dlgr %r0, %r3 +; nextln: lgr %r2, %r1 +; nextln: br %r14 + +function %udiv_i64_imm(i64) -> i64 { +block0(v0: i64): + v1 = iconst.i64 2 + v2 = udiv.i64 v0, v1 + return v2 +} + +; check: lghi %r0, 0 +; nextln: lgr %r1, %r2 +; nextln: lghi %r2, 2 +; nextln: dlgr %r0, %r2 +; nextln: lgr %r2, %r1 +; nextln: br %r14 + +function %udiv_i32(i32, i32) -> i32 { +block0(v0: i32, v1: i32): + v2 = udiv.i32 v0, v1 + return v2 +} + +; check: lhi %r0, 0 +; nextln: lr %r1, %r2 +; nextln: dlr %r0, %r3 +; nextln: lr %r2, %r1 +; nextln: br %r14 + +function %udiv_i32_imm(i32) -> i32 { +block0(v0: i32): + v1 = iconst.i32 2 + v2 = udiv.i32 v0, v1 + return v2 +} + +; check: lhi %r0, 0 +; nextln: lr %r1, %r2 +; nextln: lhi %r2, 2 +; nextln: dlr %r0, %r2 +; nextln: lr %r2, %r1 +; nextln: br %r14 + +function %udiv_i16(i16, i16) -> i16 { +block0(v0: i16, v1: i16): + v2 = udiv.i16 v0, v1 + return v2 +} + +; check: lhi %r0, 0 +; nextln: llhr %r1, %r2 +; nextln: llhr %r2, %r3 +; nextln: dlr %r0, %r2 +; nextln: lr %r2, %r1 +; nextln: br %r14 + +function %udiv_i16_imm(i16) -> i16 { +block0(v0: i16): + v1 = iconst.i16 2 + v2 = udiv.i16 v0, v1 + return v2 +} + +; check: lhi %r0, 0 +; nextln: llhr %r1, %r2 +; nextln: lhi %r2, 2 +; nextln: dlr %r0, %r2 +; nextln: lr %r2, %r1 +; nextln: br %r14 + +function %udiv_i8(i8, i8) -> i8 { +block0(v0: i8, v1: i8): + v2 = udiv.i8 v0, v1 + return v2 +} + +; check: lhi %r0, 0 +; nextln: llcr %r1, %r2 +; nextln: llcr %r2, %r3 +; nextln: dlr %r0, %r2 +; nextln: lr %r2, %r1 +; nextln: br %r14 + +function %udiv_i8_imm(i8) -> i8 { +block0(v0: i8): + v1 = iconst.i8 2 + v2 = udiv.i8 v0, v1 + return v2 +} + +; check: lhi %r0, 0 +; nextln: llcr %r1, %r2 +; nextln: lhi %r2, 2 +; nextln: dlr %r0, %r2 +; nextln: lr %r2, %r1 +; nextln: br %r14 + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; SREM +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +function %srem_i64(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2 = srem.i64 v0, v1 + return v2 +} + +; check: lgr %r1, %r2 +; nextln: cghi %r3, -1 +; nextln: locghie %r1, 0 +; nextln: dsgr %r0, %r3 +; nextln: lgr %r2, %r0 +; nextln: br %r14 + +function %srem_i32(i32, i32) -> i32 { +block0(v0: i32, v1: i32): + v2 = srem.i32 v0, v1 + return v2 +} + +; check: lgfr %r1, %r2 +; nextln: dsgfr %r0, %r3 +; nextln: lr %r2, %r0 +; nextln: br %r14 + +function %srem_i16(i16, i16) -> i16 { +block0(v0: i16, v1: i16): + v2 = srem.i16 v0, v1 + return v2 +} + +; check: lghr %r1, %r2 +; nextln: lhr %r2, %r3 +; nextln: dsgfr %r0, %r2 +; nextln: lr %r2, %r0 +; nextln: br %r14 + +function %srem_i8(i8, i8) -> i8 { +block0(v0: i8, v1: i8): + v2 = srem.i8 v0, v1 + return v2 +} + +; check: lgbr %r1, %r2 +; nextln: lbr %r2, %r3 +; nextln: dsgfr %r0, %r2 +; nextln: lr %r2, %r0 +; nextln: br %r14 + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; UREM +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +function %urem_i64(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2 = urem.i64 v0, v1 + return v2 +} + +; check: lghi %r0, 0 +; nextln: lgr %r1, %r2 +; nextln: dlgr %r0, %r3 +; nextln: lgr %r2, %r0 +; nextln: br %r14 + +function %urem_i32(i32, i32) -> i32 { +block0(v0: i32, v1: i32): + v2 = urem.i32 v0, v1 + return v2 +} + +; check: lhi %r0, 0 +; nextln: lr %r1, %r2 +; nextln: dlr %r0, %r3 +; nextln: lr %r2, %r0 +; nextln: br %r14 + +function %urem_i16(i16, i16) -> i16 { +block0(v0: i16, v1: i16): + v2 = urem.i16 v0, v1 + return v2 +} + +; check: lhi %r0, 0 +; check: llhr %r1, %r2 +; nextln: llhr %r2, %r3 +; nextln: dlr %r0, %r2 +; nextln: lr %r2, %r0 +; nextln: br %r14 + +function %urem_i8(i8, i8) -> i8 { +block0(v0: i8, v1: i8): + v2 = urem.i8 v0, v1 + return v2 +} + +; check: lhi %r0, 0 +; check: llcr %r1, %r2 +; nextln: llcr %r2, %r3 +; nextln: dlr %r0, %r2 +; nextln: lr %r2, %r0 +; nextln: br %r14 + diff --git a/cranelift/filetests/filetests/isa/s390x/bitops.clif b/cranelift/filetests/filetests/isa/s390x/bitops.clif new file mode 100644 index 0000000000..e4ab2f1f4b --- /dev/null +++ b/cranelift/filetests/filetests/isa/s390x/bitops.clif @@ -0,0 +1,243 @@ +test compile +target s390x + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; BITREV +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +; FIXME: bitrev not yet implemented + +;function %bitrev_i64(i64) -> i64 { +;block0(v0: i64): +; v1 = bitrev v0 +; return v1 +;} +; +;function %bitrev_i32(i32) -> i32 { +;block0(v0: i32): +; v1 = bitrev v0 +; return v1 +;} +; +;function %bitrev_i16(i16) -> i16 { +;block0(v0: i16): +; v1 = bitrev v0 +; return v1 +;} +; +;function %bitrev_i8(i8) -> i8 { +;block0(v0: i8): +; v1 = bitrev v0 +; return v1 +;} +; + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; CLZ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +function %clz_i64(i64) -> i64 { +block0(v0: i64): + v1 = clz v0 + return v1 +} + +; check: flogr %r0, %r2 +; nextln: lgr %r2, %r0 +; nextln: br %r14 + +function %clz_i32(i32) -> i32 { +block0(v0: i32): + v1 = clz v0 + return v1 +} + +; check: llgfr %r2, %r2 +; nextln: flogr %r0, %r2 +; nextln: lr %r2, %r0 +; nextln: ahi %r2, -32 +; nextln: br %r14 + +function %clz_i16(i16) -> i16 { +block0(v0: i16): + v1 = clz v0 + return v1 +} + +; check: llghr %r2, %r2 +; nextln: flogr %r0, %r2 +; nextln: lr %r2, %r0 +; nextln: ahi %r2, -48 +; nextln: br %r14 + +function %clz_i8(i8) -> i8 { +block0(v0: i8): + v1 = clz v0 + return v1 +} + +; check: llgcr %r2, %r2 +; nextln: flogr %r0, %r2 +; nextln: lr %r2, %r0 +; nextln: ahi %r2, -56 +; nextln: br %r14 + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; CLS +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +function %cls_i64(i64) -> i64 { +block0(v0: i64): + v1 = cls v0 + return v1 +} + +; check: srag %r3, %r2, 63 +; nextln: xgr %r3, %r2 +; nextln: flogr %r0, %r2 +; nextln: lgr %r2, %r0 +; nextln: br %r14 + +function %cls_i32(i32) -> i32 { +block0(v0: i32): + v1 = cls v0 + return v1 +} + +; check: lgfr %r2, %r2 +; nextln: srag %r3, %r2, 63 +; nextln: xgr %r3, %r2 +; nextln: flogr %r0, %r2 +; nextln: lr %r2, %r0 +; nextln: ahi %r2, -32 +; nextln: br %r14 + +function %cls_i16(i16) -> i16 { +block0(v0: i16): + v1 = cls v0 + return v1 +} + +; check: lghr %r2, %r2 +; nextln: srag %r3, %r2, 63 +; nextln: xgr %r3, %r2 +; nextln: flogr %r0, %r2 +; nextln: lr %r2, %r0 +; nextln: ahi %r2, -48 +; nextln: br %r14 + +function %cls_i8(i8) -> i8 { +block0(v0: i8): + v1 = cls v0 + return v1 +} + +; check: lgbr %r2, %r2 +; nextln: srag %r3, %r2, 63 +; nextln: xgr %r3, %r2 +; nextln: flogr %r0, %r2 +; nextln: lr %r2, %r0 +; nextln: ahi %r2, -56 +; nextln: br %r14 + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; CTZ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +function %ctz_i64(i64) -> i64 { +block0(v0: i64): + v1 = ctz v0 + return v1 +} + +; check: lcgr %r3, %r2 +; nextln: ngrk %r2, %r3, %r2 +; nextln: flogr %r0, %r2 +; nextln: locghie %r0, -1 +; nextln: lghi %r2, 63 +; nextln: sgr %r2, %r0 +; nextln: br %r14 + +function %ctz_i32(i32) -> i32 { +block0(v0: i32): + v1 = ctz v0 + return v1 +} + +; check: oihl %r2, 1 +; nextln: lcgr %r3, %r2 +; nextln: ngrk %r2, %r3, %r2 +; nextln: flogr %r0, %r2 +; nextln: lhi %r2, 63 +; nextln: sr %r2, %r0 +; nextln: br %r14 + +function %ctz_i16(i16) -> i16 { +block0(v0: i16): + v1 = ctz v0 + return v1 +} + +; check: oilh %r2, 1 +; nextln: lcgr %r3, %r2 +; nextln: ngrk %r2, %r3, %r2 +; nextln: flogr %r0, %r2 +; nextln: lhi %r2, 63 +; nextln: sr %r2, %r0 +; nextln: br %r14 + +function %ctz_i8(i8) -> i8 { +block0(v0: i8): + v1 = ctz v0 + return v1 +} + +; check: oill %r2, 256 +; nextln: lcgr %r3, %r2 +; nextln: ngrk %r2, %r3, %r2 +; nextln: flogr %r0, %r2 +; nextln: lhi %r2, 63 +; nextln: sr %r2, %r0 +; nextln: br %r14 + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; POPCNT +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +function %popcnt_i64(i64) -> i64 { +block0(v0: i64): + v1 = popcnt v0 + return v1 +} + +; check: popcnt %r2, %r2, 8 +; nextln: br %r14 + +function %popcnt_i32(i32) -> i32 { +block0(v0: i32): + v1 = popcnt v0 + return v1 +} + +; check: llgfr %r2, %r2 +; nextln: popcnt %r2, %r2, 8 +; nextln: br %r14 + +function %popcnt_i16(i16) -> i16 { +block0(v0: i16): + v1 = popcnt v0 + return v1 +} + +; check: llghr %r2, %r2 +; nextln: popcnt %r2, %r2, 8 +; nextln: br %r14 + +function %popcnt_i8(i8) -> i8 { +block0(v0: i8): + v1 = popcnt v0 + return v1 +} + +; check: popcnt %r2, %r2 +; nextln: br %r14 diff --git a/cranelift/filetests/filetests/isa/s390x/bitwise.clif b/cranelift/filetests/filetests/isa/s390x/bitwise.clif new file mode 100644 index 0000000000..dd1e75cf2f --- /dev/null +++ b/cranelift/filetests/filetests/isa/s390x/bitwise.clif @@ -0,0 +1,490 @@ + +test compile +target s390x + +; FIXME: add immediate operand versions + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; BAND +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +function %band_i64(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2 = band.i64 v0, v1 + return v2 +} + +; check: ngr %r2, %r3 +; nextln: br %r14 + +function %band_i64_mem(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2 = load.i64 v1 + v3 = band.i64 v0, v2 + return v3 +} + +; check: ng %r2, 0(%r3) +; nextln: br %r14 + +function %band_i32(i32, i32) -> i32 { +block0(v0: i32, v1: i32): + v2 = band.i32 v0, v1 + return v2 +} + +; check: nr %r2, %r3 +; nextln: br %r14 + +function %band_i32_mem(i32, i64) -> i32 { +block0(v0: i32, v1: i64): + v2 = load.i32 v1 + v3 = band.i32 v0, v2 + return v3 +} + +; check: n %r2, 0(%r3) +; nextln: br %r14 + +function %band_i32_memoff(i32, i64) -> i32 { +block0(v0: i32, v1: i64): + v2 = load.i32 v1+4096 + v3 = band.i32 v0, v2 + return v3 +} + +; check: ny %r2, 4096(%r3) +; nextln: br %r14 + +function %band_i16(i16, i16) -> i16 { +block0(v0: i16, v1: i16): + v2 = band.i16 v0, v1 + return v2 +} + +; check: nr %r2, %r3 +; nextln: br %r14 + +function %band_i16_mem(i16, i64) -> i16 { +block0(v0: i16, v1: i64): + v2 = load.i16 v1 + v3 = band.i16 v0, v2 + return v3 +} + +; check: llh %r3, 0(%r3) +; nextln: nr %r2, %r3 +; nextln: br %r14 + +function %band_i8(i8, i8) -> i8 { +block0(v0: i8, v1: i8): + v2 = band.i8 v0, v1 + return v2 +} + +; check: nr %r2, %r3 +; nextln: br %r14 + +function %band_i8_mem(i8, i64) -> i8 { +block0(v0: i8, v1: i64): + v2 = load.i8 v1 + v3 = band.i8 v0, v2 + return v3 +} + +; check: llc %r3, 0(%r3) +; nextln: nr %r2, %r3 +; nextln: br %r14 + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; BOR +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +function %bor_i64(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2 = bor.i64 v0, v1 + return v2 +} + +; check: ogr %r2, %r3 +; nextln: br %r14 + +function %bor_i64_mem(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2 = load.i64 v1 + v3 = bor.i64 v0, v2 + return v3 +} + +; check: og %r2, 0(%r3) +; nextln: br %r14 + +function %bor_i32(i32, i32) -> i32 { +block0(v0: i32, v1: i32): + v2 = bor.i32 v0, v1 + return v2 +} + +; check: or %r2, %r3 +; nextln: br %r14 + +function %bor_i32_mem(i32, i64) -> i32 { +block0(v0: i32, v1: i64): + v2 = load.i32 v1 + v3 = bor.i32 v0, v2 + return v3 +} + +; check: o %r2, 0(%r3) +; nextln: br %r14 + +function %bor_i32_memoff(i32, i64) -> i32 { +block0(v0: i32, v1: i64): + v2 = load.i32 v1+4096 + v3 = bor.i32 v0, v2 + return v3 +} + +; check: oy %r2, 4096(%r3) +; nextln: br %r14 + +function %bor_i16(i16, i16) -> i16 { +block0(v0: i16, v1: i16): + v2 = bor.i16 v0, v1 + return v2 +} + +; check: or %r2, %r3 +; nextln: br %r14 + +function %bor_i16_mem(i16, i64) -> i16 { +block0(v0: i16, v1: i64): + v2 = load.i16 v1 + v3 = bor.i16 v0, v2 + return v3 +} + +; check: llh %r3, 0(%r3) +; nextln: or %r2, %r3 +; nextln: br %r14 + +function %bor_i8(i8, i8) -> i8 { +block0(v0: i8, v1: i8): + v2 = bor.i8 v0, v1 + return v2 +} + +; check: or %r2, %r3 +; nextln: br %r14 + +function %bor_i8_mem(i8, i64) -> i8 { +block0(v0: i8, v1: i64): + v2 = load.i8 v1 + v3 = bor.i8 v0, v2 + return v3 +} + +; check: llc %r3, 0(%r3) +; nextln: or %r2, %r3 +; nextln: br %r14 + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; BXOR +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +function %bxor_i64(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2 = bxor.i64 v0, v1 + return v2 +} + +; check: xgr %r2, %r3 +; nextln: br %r14 + +function %bxor_i64_mem(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2 = load.i64 v1 + v3 = bxor.i64 v0, v2 + return v3 +} + +; check: xg %r2, 0(%r3) +; nextln: br %r14 + +function %bxor_i32(i32, i32) -> i32 { +block0(v0: i32, v1: i32): + v2 = bxor.i32 v0, v1 + return v2 +} + +; check: xr %r2, %r3 +; nextln: br %r14 + +function %bxor_i32_mem(i32, i64) -> i32 { +block0(v0: i32, v1: i64): + v2 = load.i32 v1 + v3 = bxor.i32 v0, v2 + return v3 +} + +; check: x %r2, 0(%r3) +; nextln: br %r14 + +function %bxor_i32_memoff(i32, i64) -> i32 { +block0(v0: i32, v1: i64): + v2 = load.i32 v1+4096 + v3 = bxor.i32 v0, v2 + return v3 +} + +; check: xy %r2, 4096(%r3) +; nextln: br %r14 + +function %bxor_i16(i16, i16) -> i16 { +block0(v0: i16, v1: i16): + v2 = bxor.i16 v0, v1 + return v2 +} + +; check: xr %r2, %r3 +; nextln: br %r14 + +function %bxor_i16_mem(i16, i64) -> i16 { +block0(v0: i16, v1: i64): + v2 = load.i16 v1 + v3 = bxor.i16 v0, v2 + return v3 +} + +; check: llh %r3, 0(%r3) +; nextln: xr %r2, %r3 +; nextln: br %r14 + +function %bxor_i8(i8, i8) -> i8 { +block0(v0: i8, v1: i8): + v2 = bxor.i8 v0, v1 + return v2 +} + +; check: xr %r2, %r3 +; nextln: br %r14 + +function %bxor_i8_mem(i8, i64) -> i8 { +block0(v0: i8, v1: i64): + v2 = load.i8 v1 + v3 = bxor.i8 v0, v2 + return v3 +} + +; check: llc %r3, 0(%r3) +; nextln: xr %r2, %r3 +; nextln: br %r14 + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; BAND_NOT +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +function %band_not_i64(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2 = band_not.i64 v0, v1 + return v2 +} + +; check: nngrk %r2, %r2, %r3 +; nextln: br %r14 + +function %band_not_i32(i32, i32) -> i32 { +block0(v0: i32, v1: i32): + v2 = band_not.i32 v0, v1 + return v2 +} + +; check: nnrk %r2, %r2, %r3 +; nextln: br %r14 + +function %band_not_i16(i16, i16) -> i16 { +block0(v0: i16, v1: i16): + v2 = band_not.i16 v0, v1 + return v2 +} + +; check: nnrk %r2, %r2, %r3 +; nextln: br %r14 + +function %band_not_i8(i8, i8) -> i8 { +block0(v0: i8, v1: i8): + v2 = band_not.i8 v0, v1 + return v2 +} + +; check: nnrk %r2, %r2, %r3 +; nextln: br %r14 + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; BOR_NOT +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +function %bor_not_i64(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2 = bor_not.i64 v0, v1 + return v2 +} + +; check: nogrk %r2, %r2, %r3 +; nextln: br %r14 + +function %bor_not_i32(i32, i32) -> i32 { +block0(v0: i32, v1: i32): + v2 = bor_not.i32 v0, v1 + return v2 +} + +; check: nork %r2, %r2, %r3 +; nextln: br %r14 + +function %bor_not_i16(i16, i16) -> i16 { +block0(v0: i16, v1: i16): + v2 = bor_not.i16 v0, v1 + return v2 +} + +; check: nork %r2, %r2, %r3 +; nextln: br %r14 + +function %bor_not_i8(i8, i8) -> i8 { +block0(v0: i8, v1: i8): + v2 = bor_not.i8 v0, v1 + return v2 +} + +; check: nork %r2, %r2, %r3 +; nextln: br %r14 + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; BXOR_NOT +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +function %bxor_not_i64(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2 = bxor_not.i64 v0, v1 + return v2 +} + +; check: nxgrk %r2, %r2, %r3 +; nextln: br %r14 + +function %bxor_not_i32(i32, i32) -> i32 { +block0(v0: i32, v1: i32): + v2 = bxor_not.i32 v0, v1 + return v2 +} + +; check: nxrk %r2, %r2, %r3 +; nextln: br %r14 + +function %bxor_not_i16(i16, i16) -> i16 { +block0(v0: i16, v1: i16): + v2 = bxor_not.i16 v0, v1 + return v2 +} + +; check: nxrk %r2, %r2, %r3 +; nextln: br %r14 + +function %bxor_not_i8(i8, i8) -> i8 { +block0(v0: i8, v1: i8): + v2 = bxor_not.i8 v0, v1 + return v2 +} + +; check: nxrk %r2, %r2, %r3 +; nextln: br %r14 + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; BNOT +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +function %bnot_i64(i64) -> i64 { +block0(v0: i64): + v1 = bnot.i64 v0 + return v1 +} + +; check: nogrk %r2, %r2, %r2 +; nextln: br %r14 + +function %bnot_i32(i32) -> i32 { +block0(v0: i32): + v1 = bnot.i32 v0 + return v1 +} + +; check: nork %r2, %r2, %r2 +; nextln: br %r14 + +function %bnot_i16(i16) -> i16 { +block0(v0: i16): + v1 = bnot.i16 v0 + return v1 +} + +; check: nork %r2, %r2, %r2 +; nextln: br %r14 + +function %bnot_i8(i8) -> i8 { +block0(v0: i8): + v1 = bnot.i8 v0 + return v1 +} + +; check: nork %r2, %r2, %r2 +; nextln: br %r14 + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; BITSELECT +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +function %bitselect_i64(i64, i64, i64) -> i64 { +block0(v0: i64, v1: i64, v2: i64): + v3 = bitselect.i64 v0, v1, v2 + return v3 +} + +; check: ngr %r3, %r2 +; nextln: nngrk %r2, %r4, %r2 +; nextln: ogr %r2, %r3 +; nextln: br %r14 + +function %bitselect_i32(i32, i32, i32) -> i32 { +block0(v0: i32, v1: i32, v2: i32): + v3 = bitselect.i32 v0, v1, v2 + return v3 +} + +; check: nr %r3, %r2 +; nextln: nnrk %r2, %r4, %r2 +; nextln: or %r2, %r3 +; nextln: br %r14 + +function %bitselect_i16(i16, i16, i16) -> i16 { +block0(v0: i16, v1: i16, v2: i16): + v3 = bitselect.i16 v0, v1, v2 + return v3 +} + +; check: nr %r3, %r2 +; nextln: nnrk %r2, %r4, %r2 +; nextln: or %r2, %r3 +; nextln: br %r14 + +function %bitselect_i8(i8, i8, i8) -> i8 { +block0(v0: i8, v1: i8, v2: i8): + v3 = bitselect.i8 v0, v1, v2 + return v3 +} + +; check: nr %r3, %r2 +; nextln: nnrk %r2, %r4, %r2 +; nextln: or %r2, %r3 +; nextln: br %r14 + diff --git a/cranelift/filetests/filetests/isa/s390x/call.clif b/cranelift/filetests/filetests/isa/s390x/call.clif new file mode 100644 index 0000000000..4fee8cf9f8 --- /dev/null +++ b/cranelift/filetests/filetests/isa/s390x/call.clif @@ -0,0 +1,113 @@ +test compile +target s390x + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; CALL +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +function %call(i64) -> i64 { + fn0 = %g(i64) -> i64 + +block0(v0: i64): + v1 = call fn0(v0) + return v1 +} + +; check: stmg %r14, %r15, 112(%r15) +; nextln: aghi %r15, -160 +; nextln: virtual_sp_offset_adjust 160 +; nextln: bras %r1, 12 ; data %g + 0 ; lg %r3, 0(%r1) +; nextln: basr %r14, %r3 +; nextln: lmg %r14, %r15, 272(%r15) +; nextln: br %r14 + +function %call_uext(i32) -> i64 { + fn0 = %g(i32 uext) -> i64 + +block0(v0: i32): + v1 = call fn0(v0) + return v1 +} + +; check: stmg %r14, %r15, 112(%r15) +; nextln: aghi %r15, -160 +; nextln: virtual_sp_offset_adjust 160 +; nextln: llgfr %r2, %r2 +; nextln: bras %r1, 12 ; data %g + 0 ; lg %r3, 0(%r1) +; nextln: basr %r14, %r3 +; nextln: lmg %r14, %r15, 272(%r15) +; nextln: br %r14 + +function %ret_uext(i32) -> i32 uext { +block0(v0: i32): + return v0 +} + +; check: llgfr %r2, %r2 +; nextln: br %r14 + +function %call_uext(i32) -> i64 { + fn0 = %g(i32 sext) -> i64 + +block0(v0: i32): + v1 = call fn0(v0) + return v1 +} + +; check: stmg %r14, %r15, 112(%r15) +; nextln: aghi %r15, -160 +; nextln: virtual_sp_offset_adjust 160 +; nextln: lgfr %r2, %r2 +; nextln: bras %r1, 12 ; data %g + 0 ; lg %r3, 0(%r1) +; nextln: basr %r14, %r3 +; nextln: lmg %r14, %r15, 272(%r15) +; nextln: br %r14 + +function %ret_uext(i32) -> i32 sext { +block0(v0: i32): + return v0 +} + +; check: lgfr %r2, %r2 +; nextln: br %r14 + +function %call_colocated(i64) -> i64 { + fn0 = colocated %g(i64) -> i64 + +block0(v0: i64): + v1 = call fn0(v0) + return v1 +} + +; check: stmg %r14, %r15, 112(%r15) +; nextln: aghi %r15, -160 +; nextln: virtual_sp_offset_adjust 160 +; nextln: brasl %r14, %g +; nextln: lmg %r14, %r15, 272(%r15) +; nextln: br %r14 + +function %f2(i32) -> i64 { + fn0 = %g(i32 uext) -> i64 + +block0(v0: i32): + v1 = call fn0(v0) + return v1 +} + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; CALL_INDIRECT +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +function %call_indirect(i64, i64) -> i64 { + sig0 = (i64) -> i64 +block0(v0: i64, v1: i64): + v2 = call_indirect.i64 sig0, v1(v0) + return v2 +} + +; check: stmg %r14, %r15, 112(%r15) +; nextln: aghi %r15, -160 +; nextln: virtual_sp_offset_adjust 160 +; nextln: basr %r14, %r3 +; nextln: lmg %r14, %r15, 272(%r15) +; nextln: br %r14 diff --git a/cranelift/filetests/filetests/isa/s390x/condbr.clif b/cranelift/filetests/filetests/isa/s390x/condbr.clif new file mode 100644 index 0000000000..12b81b705c --- /dev/null +++ b/cranelift/filetests/filetests/isa/s390x/condbr.clif @@ -0,0 +1,62 @@ +test compile +target s390x + +function %f(i64, i64) -> b1 { +block0(v0: i64, v1: i64): + v2 = icmp eq v0, v1 + return v2 +} + +; check: clgr %r2, %r3 +; nextln: lhi %r2, 0 +; nextln: lochie %r2, 1 +; nextln: br %r14 + +function %f(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2 = icmp eq v0, v1 + brnz v2, block1 + jump block2 + +block1: + v4 = iconst.i64 1 + return v4 + +block2: + v5 = iconst.i64 2 + return v5 +} + +; check: Block 0: +; check: clgr %r2, %r3 +; nextln: jge label1 ; jg label2 +; check: Block 1: +; check: lghi %r2, 1 +; nextln: br %r14 +; check: Block 2: +; check: lghi %r2, 2 +; nextln: br %r14 + +function %f(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2 = icmp eq v0, v1 + brnz v2, block1 + jump block1 + +block1: + v4 = iconst.i64 1 + return v4 +} + +; FIXME: Should optimize away branches + +; check: Block 0: +; check: clgr %r2, %r3 +; nextln: jge label1 ; jg label2 +; check: Block 1: +; check: jg label3 +; check: Block 2: +; check: jg label3 +; check: Block 3: +; check: lghi %r2, 1 +; nextln: br %r14 diff --git a/cranelift/filetests/filetests/isa/s390x/condops.clif b/cranelift/filetests/filetests/isa/s390x/condops.clif new file mode 100644 index 0000000000..aaf1c3ba54 --- /dev/null +++ b/cranelift/filetests/filetests/isa/s390x/condops.clif @@ -0,0 +1,43 @@ +test compile +target s390x + +function %f(i8, i64, i64) -> i64 { +block0(v0: i8, v1: i64, v2: i64): + v3 = iconst.i8 42 + v4 = icmp eq v0, v3 + v5 = select.i64 v4, v1, v2 + return v5 +} + +; check: llcr %r2, %r2 +; nextln: clfi %r2, 42 +; nextln: locgre %r4, %r3 +; nextln: lgr %r2, %r4 +; nextln: br %r14 + +function %g(b1, i8, i8) -> i8 { +block0(v0: b1, v1: i8, v2: i8): + v3 = select.i8 v0, v1, v2 + return v3 +} + +; FIXME: optimize i8/i16 compares + +; check: llcr %r2, %r2 +; nextln: chi %r2, 0 +; nextln: locrlh %r4, %r3 +; nextln: lr %r2, %r4 +; nextln: br %r14 + +function %i(i32, i8, i8) -> i8 { +block0(v0: i32, v1: i8, v2: i8): + v3 = iconst.i32 42 + v4 = icmp.i32 eq v0, v3 + v5 = select.i8 v4, v1, v2 + return v5 +} + +; check: clfi %r2, 42 +; nextln: locre %r4, %r3 +; nextln: lr %r2, %r4 +; nextln: br %r14 diff --git a/cranelift/filetests/filetests/isa/s390x/constants.clif b/cranelift/filetests/filetests/isa/s390x/constants.clif new file mode 100644 index 0000000000..96effdecde --- /dev/null +++ b/cranelift/filetests/filetests/isa/s390x/constants.clif @@ -0,0 +1,113 @@ +test compile +target s390x + +function %f() -> b8 { +block0: + v0 = bconst.b8 true + return v0 +} + +; check: lhi %r2, 255 +; nextln: br %r14 + +function %f() -> b16 { +block0: + v0 = bconst.b16 false + return v0 +} + +; check: lhi %r2, 0 +; nextln: br %r14 + +function %f() -> i64 { +block0: + v0 = iconst.i64 0 + return v0 +} + +; check: lghi %r2, 0 +; nextln: br %r14 + +function %f() -> i64 { +block0: + v0 = iconst.i64 0xffff + return v0 +} + +; check: lgfi %r2, 65535 +; nextln: br %r14 + +function %f() -> i64 { +block0: + v0 = iconst.i64 0xffff0000 + return v0 +} + +; check: llilh %r2, 65535 +; nextln: br %r14 + +function %f() -> i64 { +block0: + v0 = iconst.i64 0xffff00000000 + return v0 +} + +; check: llihl %r2, 65535 +; nextln: br %r14 + +function %f() -> i64 { +block0: + v0 = iconst.i64 0xffff000000000000 + return v0 +} + +; check: llihh %r2, 65535 +; nextln: br %r14 + +function %f() -> i64 { +block0: + v0 = iconst.i64 0xffffffffffffffff + return v0 +} + +; check: lghi %r2, -1 +; nextln: br %r14 + +function %f() -> i64 { +block0: + v0 = iconst.i64 0xffffffffffff0000 + return v0 +} + +; check: lgfi %r2, -65536 +; nextln: br %r14 + +function %f() -> i64 { +block0: + v0 = iconst.i64 0xf34bf0a31212003a ; random digits + return v0 +} + +; check: llihf %r2, 4081840291 +; nextln: iilf %r2, 303169594 +; nextln: br %r14 + +function %f() -> i64 { +block0: + v0 = iconst.i64 0x12e900001ef40000 ; random digits with 2 clear half words + return v0 +} + +; check: llihh %r2, 4841 +; nextln: iilh %r2, 7924 +; nextln: br %r14 + +function %f() -> i32 { +block0: + v0 = iconst.i32 -1 + return v0 +} + +; check: lhi %r2, -1 +; nextln: br %r14 + diff --git a/cranelift/filetests/filetests/isa/s390x/conversions.clif b/cranelift/filetests/filetests/isa/s390x/conversions.clif new file mode 100644 index 0000000000..5d57c8881a --- /dev/null +++ b/cranelift/filetests/filetests/isa/s390x/conversions.clif @@ -0,0 +1,748 @@ +test compile +target s390x + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; UEXTEND +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +function %uextend_i32_i64(i32) -> i64 { +block0(v0: i32): + v1 = uextend.i64 v0 + return v1 +} + +; check: llgfr %r2, %r2 +; nextln: br %r14 + +function %uextend_i16_i64(i16) -> i64 { +block0(v0: i16): + v1 = uextend.i64 v0 + return v1 +} + +; check: llghr %r2, %r2 +; nextln: br %r14 + +function %uextend_i16_i32(i16) -> i32 { +block0(v0: i16): + v1 = uextend.i32 v0 + return v1 +} + +; check: llhr %r2, %r2 +; nextln: br %r14 + +function %uextend_i8_i64(i8) -> i64 { +block0(v0: i8): + v1 = uextend.i64 v0 + return v1 +} + +; check: llgcr %r2, %r2 +; nextln: br %r14 + +function %uextend_i8_i32(i8) -> i32 { +block0(v0: i8): + v1 = uextend.i32 v0 + return v1 +} + +; check: llcr %r2, %r2 +; nextln: br %r14 + +function %uextend_i8_i16(i8) -> i16 { +block0(v0: i8): + v1 = uextend.i16 v0 + return v1 +} + +; check: llcr %r2, %r2 +; nextln: br %r14 + + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; SEXTEND +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +function %sextend_i32_i64(i32) -> i64 { +block0(v0: i32): + v1 = sextend.i64 v0 + return v1 +} + +; check: lgfr %r2, %r2 +; nextln: br %r14 + +function %sextend_i16_i64(i16) -> i64 { +block0(v0: i16): + v1 = sextend.i64 v0 + return v1 +} + +; check: lghr %r2, %r2 +; nextln: br %r14 + +function %sextend_i16_i32(i16) -> i32 { +block0(v0: i16): + v1 = sextend.i32 v0 + return v1 +} + +; check: lhr %r2, %r2 +; nextln: br %r14 + +function %sextend_i8_i64(i8) -> i64 { +block0(v0: i8): + v1 = sextend.i64 v0 + return v1 +} + +; check: lgbr %r2, %r2 +; nextln: br %r14 + +function %sextend_i8_i32(i8) -> i32 { +block0(v0: i8): + v1 = sextend.i32 v0 + return v1 +} + +; check: lbr %r2, %r2 +; nextln: br %r14 + +function %sextend_i8_i16(i8) -> i16 { +block0(v0: i8): + v1 = sextend.i16 v0 + return v1 +} + +; check: lbr %r2, %r2 +; nextln: br %r14 + + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; IREDUCE +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +function %ireduce_i64_i32(i64, i64) -> i32 { +block0(v0: i64, v1: i64): + v2 = ireduce.i32 v1 + return v2 +} + +; check: lr %r2, %r3 +; nextln: br %r14 + +function %ireduce_i64_i16(i64, i64) -> i16 { +block0(v0: i64, v1: i64): + v2 = ireduce.i16 v1 + return v2 +} + +; check: lr %r2, %r3 +; nextln: br %r14 + +function %ireduce_i64_i8(i64, i64) -> i8 { +block0(v0: i64, v1: i64): + v2 = ireduce.i8 v1 + return v2 +} + +; check: lr %r2, %r3 +; nextln: br %r14 + +function %ireduce_i32_i16(i32, i32) -> i16 { +block0(v0: i32, v1: i32): + v2 = ireduce.i16 v1 + return v2 +} + +; check: lr %r2, %r3 +; nextln: br %r14 + +function %ireduce_i32_i8(i32, i32) -> i8 { +block0(v0: i32, v1: i32): + v2 = ireduce.i8 v1 + return v2 +} + +; check: lr %r2, %r3 +; nextln: br %r14 + +function %ireduce_i16_i8(i16, i16) -> i8 { +block0(v0: i16, v1: i16): + v2 = ireduce.i8 v1 + return v2 +} + +; check: lr %r2, %r3 +; nextln: br %r14 + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; BEXTEND +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +function %bextend_b32_b64(b32) -> b64 { +block0(v0: b32): + v1 = bextend.b64 v0 + return v1 +} + +; check: lgfr %r2, %r2 +; nextln: br %r14 + +function %bextend_b16_b64(b16) -> b64 { +block0(v0: b16): + v1 = bextend.b64 v0 + return v1 +} + +; check: lghr %r2, %r2 +; nextln: br %r14 + +function %bextend_b16_b32(b16) -> b32 { +block0(v0: b16): + v1 = bextend.b32 v0 + return v1 +} + +; check: lhr %r2, %r2 +; nextln: br %r14 + +function %bextend_b8_b64(b8) -> b64 { +block0(v0: b8): + v1 = bextend.b64 v0 + return v1 +} + +; check: lgbr %r2, %r2 +; nextln: br %r14 + +function %bextend_b8_b32(b8) -> b32 { +block0(v0: b8): + v1 = bextend.b32 v0 + return v1 +} + +; check: lbr %r2, %r2 +; nextln: br %r14 + +function %bextend_b8_b16(b8) -> b16 { +block0(v0: b8): + v1 = bextend.b16 v0 + return v1 +} + +; check: lbr %r2, %r2 +; nextln: br %r14 + +function %bextend_b1_b64(b1) -> b64 { +block0(v0: b1): + v1 = bextend.b64 v0 + return v1 +} + +; check: sllg %r2, %r2, 63 +; nextln: srag %r2, %r2, 63 +; nextln: br %r14 + +function %bextend_b1_b32(b1) -> b32 { +block0(v0: b1): + v1 = bextend.b32 v0 + return v1 +} + +; check: sllk %r2, %r2, 31 +; nextln: srak %r2, %r2, 31 +; nextln: br %r14 + +function %bextend_b1_b16(b1) -> b16 { +block0(v0: b1): + v1 = bextend.b16 v0 + return v1 +} + +; check: sllk %r2, %r2, 31 +; nextln: srak %r2, %r2, 31 +; nextln: br %r14 + +function %bextend_b1_b8(b1) -> b8 { +block0(v0: b1): + v1 = bextend.b8 v0 + return v1 +} + +; check: sllk %r2, %r2, 31 +; nextln: srak %r2, %r2, 31 +; nextln: br %r14 + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; BREDUCE +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +function %breduce_b64_b32(b64, b64) -> b32 { +block0(v0: b64, v1: b64): + v2 = breduce.b32 v1 + return v2 +} + +; check: lr %r2, %r3 +; nextln: br %r14 + +function %breduce_b64_b16(b64, b64) -> b16 { +block0(v0: b64, v1: b64): + v2 = breduce.b16 v1 + return v2 +} + +; check: lr %r2, %r3 +; nextln: br %r14 + +function %breduce_b64_b8(b64, b64) -> b8 { +block0(v0: b64, v1: b64): + v2 = breduce.b8 v1 + return v2 +} + +; check: lr %r2, %r3 +; nextln: br %r14 + +function %breduce_b64_b1(b64, b64) -> b1 { +block0(v0: b64, v1: b64): + v2 = breduce.b1 v1 + return v2 +} + +; check: lr %r2, %r3 +; nextln: br %r14 + +function %breduce_b32_b16(b32, b32) -> b16 { +block0(v0: b32, v1: b32): + v2 = breduce.b16 v1 + return v2 +} + +; check: lr %r2, %r3 +; nextln: br %r14 + +function %breduce_b32_b8(b32, b32) -> b8 { +block0(v0: b32, v1: b32): + v2 = breduce.b8 v1 + return v2 +} + +; check: lr %r2, %r3 +; nextln: br %r14 + +function %breduce_b32_b1(b32, b32) -> b1 { +block0(v0: b32, v1: b32): + v2 = breduce.b1 v1 + return v2 +} + +; check: lr %r2, %r3 +; nextln: br %r14 + +function %breduce_b16_b8(b16, b16) -> b8 { +block0(v0: b16, v1: b16): + v2 = breduce.b8 v1 + return v2 +} + +; check: lr %r2, %r3 +; nextln: br %r14 + +function %breduce_b16_b1(b16, b16) -> b1 { +block0(v0: b16, v1: b16): + v2 = breduce.b1 v1 + return v2 +} + +; check: lr %r2, %r3 +; nextln: br %r14 + +function %breduce_b8_b1(b8, b8) -> b1 { +block0(v0: b8, v1: b8): + v2 = breduce.b1 v1 + return v2 +} + +; check: lr %r2, %r3 +; nextln: br %r14 + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; BMASK +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +function %bmask_b64_i64(b64, b64) -> i64 { +block0(v0: b64, v1: b64): + v2 = bmask.i64 v1 + return v2 +} + +; check: lgr %r2, %r3 +; nextln: br %r14 + +function %bmask_b64_i32(b64, b64) -> i32 { +block0(v0: b64, v1: b64): + v2 = bmask.i32 v1 + return v2 +} + +; check: lr %r2, %r3 +; nextln: br %r14 + +function %bmask_b64_i16(b64, b64) -> i16 { +block0(v0: b64, v1: b64): + v2 = bmask.i16 v1 + return v2 +} + +; check: lr %r2, %r3 +; nextln: br %r14 + +function %bmask_b64_i8(b64, b64) -> i8 { +block0(v0: b64, v1: b64): + v2 = bmask.i8 v1 + return v2 +} + +; check: lr %r2, %r3 +; nextln: br %r14 + +function %bmask_b32_i64(b32, b32) -> i64 { +block0(v0: b32, v1: b32): + v2 = bmask.i64 v1 + return v2 +} + +; check: lgfr %r2, %r3 +; nextln: br %r14 + +function %bmask_b32_i32(b32, b32) -> i32 { +block0(v0: b32, v1: b32): + v2 = bmask.i32 v1 + return v2 +} + +; check: lr %r2, %r3 +; nextln: br %r14 + +function %bmask_b32_i16(b32, b32) -> i16 { +block0(v0: b32, v1: b32): + v2 = bmask.i16 v1 + return v2 +} + +; check: lr %r2, %r3 +; nextln: br %r14 + +function %bmask_b32_i8(b32, b32) -> i8 { +block0(v0: b32, v1: b32): + v2 = bmask.i8 v1 + return v2 +} + +; check: lr %r2, %r3 +; nextln: br %r14 + +function %bmask_b16_i64(b16, b16) -> i64 { +block0(v0: b16, v1: b16): + v2 = bmask.i64 v1 + return v2 +} + +; check: lghr %r2, %r3 +; nextln: br %r14 + +function %bmask_b16_i32(b16, b16) -> i32 { +block0(v0: b16, v1: b16): + v2 = bmask.i32 v1 + return v2 +} + +; check: lhr %r2, %r3 +; nextln: br %r14 + +function %bmask_b16_i16(b16, b16) -> i16 { +block0(v0: b16, v1: b16): + v2 = bmask.i16 v1 + return v2 +} + +; check: lr %r2, %r3 +; nextln: br %r14 + +function %bmask_b16_i8(b16, b16) -> i8 { +block0(v0: b16, v1: b16): + v2 = bmask.i8 v1 + return v2 +} + +; check: lr %r2, %r3 +; nextln: br %r14 + +function %bmask_b8_i64(b8, b8) -> i64 { +block0(v0: b8, v1: b8): + v2 = bmask.i64 v1 + return v2 +} + +; check: lgbr %r2, %r3 +; nextln: br %r14 + +function %bmask_b8_i32(b8, b8) -> i32 { +block0(v0: b8, v1: b8): + v2 = bmask.i32 v1 + return v2 +} + +; check: lbr %r2, %r3 +; nextln: br %r14 + +function %bmask_b8_i16(b8, b8) -> i16 { +block0(v0: b8, v1: b8): + v2 = bmask.i16 v1 + return v2 +} + +; check: lbr %r2, %r3 +; nextln: br %r14 + +function %bmask_b8_i8(b8, b8) -> i8 { +block0(v0: b8, v1: b8): + v2 = bmask.i8 v1 + return v2 +} + +; check: lr %r2, %r3 +; nextln: br %r14 + +function %bmask_b1_i64(b1, b1) -> i64 { +block0(v0: b1, v1: b1): + v2 = bmask.i64 v1 + return v2 +} + +; check: sllg %r2, %r3, 63 +; nextln: srag %r2, %r2, 63 +; nextln: br %r14 + +function %bmask_b1_i32(b1, b1) -> i32 { +block0(v0: b1, v1: b1): + v2 = bmask.i32 v1 + return v2 +} + +; check: sllk %r2, %r3, 31 +; nextln: srak %r2, %r2, 31 +; nextln: br %r14 + +function %bmask_b1_i16(b1, b1) -> i16 { +block0(v0: b1, v1: b1): + v2 = bmask.i16 v1 + return v2 +} + +; check: sllk %r2, %r3, 31 +; nextln: srak %r2, %r2, 31 +; nextln: br %r14 + +function %bmask_b1_i8(b1, b1) -> i8 { +block0(v0: b1, v1: b1): + v2 = bmask.i8 v1 + return v2 +} + +; check: sllk %r2, %r3, 31 +; nextln: srak %r2, %r2, 31 +; nextln: br %r14 + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; BINT +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +function %bint_b64_i64(b64) -> i64 { +block0(v0: b64): + v1 = bint.i64 v0 + return v1 +} + +; check: lghi %r3, 1 +; nextln: ngr %r2, %r3 +; nextln: br %r14 + +function %bint_b64_i32(b64) -> i32 { +block0(v0: b64): + v1 = bint.i32 v0 + return v1 +} + +; check: nilf %r2, 1 +; nextln: br %r14 + +function %bint_b64_i16(b64) -> i16 { +block0(v0: b64): + v1 = bint.i16 v0 + return v1 +} + +; check: nill %r2, 1 +; nextln: br %r14 + +function %bint_b64_i8(b64) -> i8 { +block0(v0: b64): + v1 = bint.i8 v0 + return v1 +} + +; check: nill %r2, 1 +; nextln: br %r14 + +function %bint_b32_i64(b32) -> i64 { +block0(v0: b32): + v1 = bint.i64 v0 + return v1 +} + +; check: lghi %r3, 1 +; nextln: ngr %r2, %r3 +; nextln: br %r14 + +function %bint_b32_i32(b32) -> i32 { +block0(v0: b32): + v1 = bint.i32 v0 + return v1 +} + +; check: nilf %r2, 1 +; nextln: br %r14 + +function %bint_b32_i16(b32) -> i16 { +block0(v0: b32): + v1 = bint.i16 v0 + return v1 +} + +; check: nill %r2, 1 +; nextln: br %r14 + +function %bint_b32_i8(b32) -> i8 { +block0(v0: b32): + v1 = bint.i8 v0 + return v1 +} + +; check: nill %r2, 1 +; nextln: br %r14 + +function %bint_b16_i64(b16) -> i64 { +block0(v0: b16): + v1 = bint.i64 v0 + return v1 +} + +; check: lghi %r3, 1 +; nextln: ngr %r2, %r3 +; nextln: br %r14 + +function %bint_b16_i32(b16) -> i32 { +block0(v0: b16): + v1 = bint.i32 v0 + return v1 +} + +; check: nilf %r2, 1 +; nextln: br %r14 + +function %bint_b16_i16(b16) -> i16 { +block0(v0: b16): + v1 = bint.i16 v0 + return v1 +} + +; check: nill %r2, 1 +; nextln: br %r14 + +function %bint_b16_i8(b16) -> i8 { +block0(v0: b16): + v1 = bint.i8 v0 + return v1 +} + +; check: nill %r2, 1 +; nextln: br %r14 + +function %bint_b8_i64(b8) -> i64 { +block0(v0: b8): + v1 = bint.i64 v0 + return v1 +} + +; check: lghi %r3, 1 +; nextln: ngr %r2, %r3 +; nextln: br %r14 + +function %bint_b8_i32(b8) -> i32 { +block0(v0: b8): + v1 = bint.i32 v0 + return v1 +} + +; check: nilf %r2, 1 +; nextln: br %r14 + +function %bint_b8_i16(b8) -> i16 { +block0(v0: b8): + v1 = bint.i16 v0 + return v1 +} + +; check: nill %r2, 1 +; nextln: br %r14 + +function %bint_b8_i8(b8) -> i8 { +block0(v0: b8): + v1 = bint.i8 v0 + return v1 +} + +; check: nill %r2, 1 +; nextln: br %r14 + +function %bint_b1_i64(b1) -> i64 { +block0(v0: b1): + v1 = bint.i64 v0 + return v1 +} + +; check: lghi %r3, 1 +; nextln: ngr %r2, %r3 +; nextln: br %r14 + +function %bint_b1_i32(b1) -> i32 { +block0(v0: b1): + v1 = bint.i32 v0 + return v1 +} + +; check: nilf %r2, 1 +; nextln: br %r14 + +function %bint_b1_i16(b1) -> i16 { +block0(v0: b1): + v1 = bint.i16 v0 + return v1 +} + +; check: nill %r2, 1 +; nextln: br %r14 + +function %bint_b1_i8(b1) -> i8 { +block0(v0: b1): + v1 = bint.i8 v0 + return v1 +} + +; check: nill %r2, 1 +; nextln: br %r14 + diff --git a/cranelift/filetests/filetests/isa/s390x/div-traps.clif b/cranelift/filetests/filetests/isa/s390x/div-traps.clif new file mode 100644 index 0000000000..2d7428b50d --- /dev/null +++ b/cranelift/filetests/filetests/isa/s390x/div-traps.clif @@ -0,0 +1,355 @@ +test compile +set avoid_div_traps=1 +target s390x + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; SDIV +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +function %sdiv_i64(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2 = sdiv.i64 v0, v1 + return v2 +} + +; check: lgr %r1, %r2 +; nextln: cgite %r3, 0 +; nextln: llihf %r2, 2147483647 +; nextln: iilf %r2, 4294967295 +; nextln: xgr %r2, %r1 +; nextln: ngr %r2, %r3 +; nextln: cgite %r2, -1 +; nextln: dsgr %r0, %r3 +; nextln: lgr %r2, %r1 +; nextln: br %r14 + +function %sdiv_i64_imm(i64) -> i64 { +block0(v0: i64): + v1 = iconst.i64 2 + v2 = sdiv.i64 v0, v1 + return v2 +} + +; check: lgr %r1, %r2 +; nextln: lghi %r2, 2 +; nextln: dsgr %r0, %r2 +; nextln: lgr %r2, %r1 +; nextln: br %r14 + +function %sdiv_i32(i32, i32) -> i32 { +block0(v0: i32, v1: i32): + v2 = sdiv.i32 v0, v1 + return v2 +} + +; check: lgfr %r1, %r2 +; nextln: cite %r3, 0 +; nextln: iilf %r2, 2147483647 +; nextln: xr %r2, %r1 +; nextln: nr %r2, %r3 +; nextln: cite %r2, -1 +; nextln: dsgfr %r0, %r3 +; nextln: lr %r2, %r1 +; nextln: br %r14 + +function %sdiv_i32_imm(i32) -> i32 { +block0(v0: i32): + v1 = iconst.i32 2 + v2 = sdiv.i32 v0, v1 + return v2 +} + +; check: lgfr %r1, %r2 +; nextln: lhi %r2, 2 +; nextln: dsgfr %r0, %r2 +; nextln: lr %r2, %r1 +; nextln: br %r14 + +function %sdiv_i16(i16, i16) -> i16 { +block0(v0: i16, v1: i16): + v2 = sdiv.i16 v0, v1 + return v2 +} + +; check: lghr %r1, %r2 +; nextln: lhr %r2, %r3 +; nextln: cite %r2, 0 +; nextln: lhi %r3, 32767 +; nextln: xr %r3, %r1 +; nextln: nr %r3, %r2 +; nextln: cite %r3, -1 +; nextln: dsgfr %r0, %r2 +; nextln: lr %r2, %r1 +; nextln: br %r14 + +function %sdiv_i16_imm(i16) -> i16 { +block0(v0: i16): + v1 = iconst.i16 2 + v2 = sdiv.i16 v0, v1 + return v2 +} + +; check: lghr %r1, %r2 +; nextln: lhi %r2, 2 +; nextln: dsgfr %r0, %r2 +; nextln: lr %r2, %r1 +; nextln: br %r14 + +function %sdiv_i8(i8, i8) -> i8 { +block0(v0: i8, v1: i8): + v2 = sdiv.i8 v0, v1 + return v2 +} + +; check: lgbr %r1, %r2 +; nextln: lbr %r2, %r3 +; nextln: cite %r2, 0 +; nextln: lhi %r3, 127 +; nextln: xr %r3, %r1 +; nextln: nr %r3, %r2 +; nextln: cite %r3, -1 +; nextln: dsgfr %r0, %r2 +; nextln: lr %r2, %r1 +; nextln: br %r14 + +function %sdiv_i8_imm(i8) -> i8 { +block0(v0: i8): + v1 = iconst.i8 2 + v2 = sdiv.i8 v0, v1 + return v2 +} + +; check: lgbr %r1, %r2 +; nextln: lhi %r2, 2 +; nextln: dsgfr %r0, %r2 +; nextln: lr %r2, %r1 +; nextln: br %r14 + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; UDIV +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +function %udiv_i64(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2 = udiv.i64 v0, v1 + return v2 +} + +; check: lghi %r0, 0 +; nextln: lgr %r1, %r2 +; nextln: cgite %r3, 0 +; nextln: dlgr %r0, %r3 +; nextln: lgr %r2, %r1 +; nextln: br %r14 + +function %udiv_i64_imm(i64) -> i64 { +block0(v0: i64): + v1 = iconst.i64 2 + v2 = udiv.i64 v0, v1 + return v2 +} + +; check: lghi %r0, 0 +; nextln: lgr %r1, %r2 +; nextln: lghi %r2, 2 +; nextln: dlgr %r0, %r2 +; nextln: lgr %r2, %r1 +; nextln: br %r14 + +function %udiv_i32(i32, i32) -> i32 { +block0(v0: i32, v1: i32): + v2 = udiv.i32 v0, v1 + return v2 +} + +; check: lhi %r0, 0 +; nextln: lr %r1, %r2 +; nextln: cite %r3, 0 +; nextln: dlr %r0, %r3 +; nextln: lr %r2, %r1 +; nextln: br %r14 + +function %udiv_i32_imm(i32) -> i32 { +block0(v0: i32): + v1 = iconst.i32 2 + v2 = udiv.i32 v0, v1 + return v2 +} + +; check: lhi %r0, 0 +; nextln: lr %r1, %r2 +; nextln: lhi %r2, 2 +; nextln: dlr %r0, %r2 +; nextln: lr %r2, %r1 +; nextln: br %r14 + +function %udiv_i16(i16, i16) -> i16 { +block0(v0: i16, v1: i16): + v2 = udiv.i16 v0, v1 + return v2 +} + +; check: lhi %r0, 0 +; nextln: llhr %r1, %r2 +; nextln: llhr %r2, %r3 +; nextln: cite %r2, 0 +; nextln: dlr %r0, %r2 +; nextln: lr %r2, %r1 +; nextln: br %r14 + +function %udiv_i16_imm(i16) -> i16 { +block0(v0: i16): + v1 = iconst.i16 2 + v2 = udiv.i16 v0, v1 + return v2 +} + +; check: lhi %r0, 0 +; nextln: llhr %r1, %r2 +; nextln: lhi %r2, 2 +; nextln: dlr %r0, %r2 +; nextln: lr %r2, %r1 +; nextln: br %r14 + +function %udiv_i8(i8, i8) -> i8 { +block0(v0: i8, v1: i8): + v2 = udiv.i8 v0, v1 + return v2 +} + +; check: lhi %r0, 0 +; nextln: llcr %r1, %r2 +; nextln: llcr %r2, %r3 +; nextln: cite %r2, 0 +; nextln: dlr %r0, %r2 +; nextln: lr %r2, %r1 +; nextln: br %r14 + +function %udiv_i8_imm(i8) -> i8 { +block0(v0: i8): + v1 = iconst.i8 2 + v2 = udiv.i8 v0, v1 + return v2 +} + +; check: lhi %r0, 0 +; nextln: llcr %r1, %r2 +; nextln: lhi %r2, 2 +; nextln: dlr %r0, %r2 +; nextln: lr %r2, %r1 +; nextln: br %r14 + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; SREM +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +function %srem_i64(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2 = srem.i64 v0, v1 + return v2 +} + +; check: lgr %r1, %r2 +; nextln: cgite %r3, 0 +; nextln: cghi %r3, -1 +; nextln: locghie %r1, 0 +; nextln: dsgr %r0, %r3 +; nextln: lgr %r2, %r0 +; nextln: br %r14 + +function %srem_i32(i32, i32) -> i32 { +block0(v0: i32, v1: i32): + v2 = srem.i32 v0, v1 + return v2 +} + +; check: lgfr %r1, %r2 +; nextln: cite %r3, 0 +; nextln: dsgfr %r0, %r3 +; nextln: lr %r2, %r0 +; nextln: br %r14 + +function %srem_i16(i16, i16) -> i16 { +block0(v0: i16, v1: i16): + v2 = srem.i16 v0, v1 + return v2 +} + +; check: lghr %r1, %r2 +; nextln: lhr %r2, %r3 +; nextln: cite %r2, 0 +; nextln: dsgfr %r0, %r2 +; nextln: lr %r2, %r0 +; nextln: br %r14 + +function %srem_i8(i8, i8) -> i8 { +block0(v0: i8, v1: i8): + v2 = srem.i8 v0, v1 + return v2 +} + +; check: lgbr %r1, %r2 +; nextln: lbr %r2, %r3 +; nextln: cite %r2, 0 +; nextln: dsgfr %r0, %r2 +; nextln: lr %r2, %r0 +; nextln: br %r14 + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; UREM +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +function %urem_i64(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2 = urem.i64 v0, v1 + return v2 +} + +; check: lghi %r0, 0 +; nextln: lgr %r1, %r2 +; nextln: cgite %r3, 0 +; nextln: dlgr %r0, %r3 +; nextln: lgr %r2, %r0 +; nextln: br %r14 + +function %urem_i32(i32, i32) -> i32 { +block0(v0: i32, v1: i32): + v2 = urem.i32 v0, v1 + return v2 +} + +; check: lhi %r0, 0 +; nextln: lr %r1, %r2 +; nextln: cite %r3, 0 +; nextln: dlr %r0, %r3 +; nextln: lr %r2, %r0 +; nextln: br %r14 + +function %urem_i16(i16, i16) -> i16 { +block0(v0: i16, v1: i16): + v2 = urem.i16 v0, v1 + return v2 +} + +; check: lhi %r0, 0 +; nextln: llhr %r1, %r2 +; nextln: llhr %r2, %r3 +; nextln: cite %r2, 0 +; nextln: dlr %r0, %r2 +; nextln: lr %r2, %r0 +; nextln: br %r14 + +function %urem_i8(i8, i8) -> i8 { +block0(v0: i8, v1: i8): + v2 = urem.i8 v0, v1 + return v2 +} + +; check: lhi %r0, 0 +; nextln: llcr %r1, %r2 +; nextln: llcr %r2, %r3 +; nextln: cite %r2, 0 +; nextln: dlr %r0, %r2 +; nextln: lr %r2, %r0 +; nextln: br %r14 + diff --git a/cranelift/filetests/filetests/isa/s390x/floating-point.clif b/cranelift/filetests/filetests/isa/s390x/floating-point.clif new file mode 100644 index 0000000000..6f95c82487 --- /dev/null +++ b/cranelift/filetests/filetests/isa/s390x/floating-point.clif @@ -0,0 +1,711 @@ +test compile +target s390x + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; F32CONST/F64CONST +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +; FIXME: should use FZERO instruction +; FIXME: should use out-of-line literal pool + +function %f32const_zero() -> f32 { +block0: + v1 = f32const 0x0.0 + return v1 +} + +; check: bras %r1, 8 ; data.f32 0 ; le %f0, 0(%r1) +; nextln: br %r14 + +function %f64const_zero() -> f64 { +block0: + v1 = f64const 0x0.0 + return v1 +} + +; check: bras %r1, 12 ; data.f64 0 ; ld %f0, 0(%r1) +; nextln: br %r14 + +function %f32const_one() -> f32 { +block0: + v1 = f32const 0x1.0 + return v1 +} + +; check: bras %r1, 8 ; data.f32 1 ; le %f0, 0(%r1) +; nextln: br %r14 + +function %f64const_one() -> f64 { +block0: + v1 = f64const 0x1.0 + return v1 +} + +; check: bras %r1, 12 ; data.f64 1 ; ld %f0, 0(%r1) +; nextln: br %r14 + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; FADD +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +function %fadd_f32(f32, f32) -> f32 { +block0(v0: f32, v1: f32): + v2 = fadd v0, v1 + return v2 +} + +; check: aebr %f0, %f2 +; nextln: br %r14 + +function %fadd_f64(f64, f64) -> f64 { +block0(v0: f64, v1: f64): + v2 = fadd v0, v1 + return v2 +} + +; check: adbr %f0, %f2 +; nextln: br %r14 + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; FSUB +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +function %fsub_f32(f32, f32) -> f32 { +block0(v0: f32, v1: f32): + v2 = fsub v0, v1 + return v2 +} + +; check: sebr %f0, %f2 +; nextln: br %r14 + +function %fsub_f64(f64, f64) -> f64 { +block0(v0: f64, v1: f64): + v2 = fsub v0, v1 + return v2 +} + +; check: sdbr %f0, %f2 +; nextln: br %r14 + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; FMUL +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +function %fmul_f32(f32, f32) -> f32 { +block0(v0: f32, v1: f32): + v2 = fmul v0, v1 + return v2 +} + +; check: meebr %f0, %f2 +; nextln: br %r14 + +function %fmul_f64(f64, f64) -> f64 { +block0(v0: f64, v1: f64): + v2 = fmul v0, v1 + return v2 +} + +; check: mdbr %f0, %f2 +; nextln: br %r14 + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; FDIV +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +function %fdiv_f32(f32, f32) -> f32 { +block0(v0: f32, v1: f32): + v2 = fdiv v0, v1 + return v2 +} + +; check: debr %f0, %f2 +; nextln: br %r14 + +function %fdiv_f64(f64, f64) -> f64 { +block0(v0: f64, v1: f64): + v2 = fdiv v0, v1 + return v2 +} + +; check: ddbr %f0, %f2 +; nextln: br %r14 + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; FMIN +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +function %fmin_f32(f32, f32) -> f32 { +block0(v0: f32, v1: f32): + v2 = fmin v0, v1 + return v2 +} + +; check: wfminsb %f0, %f0, %f2, 1 +; nextln: br %r14 + +function %fmin_f64(f64, f64) -> f64 { +block0(v0: f64, v1: f64): + v2 = fmin v0, v1 + return v2 +} + +; check: wfmindb %f0, %f0, %f2, 1 +; nextln: br %r14 + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; FMAX +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +function %fmax_f32(f32, f32) -> f32 { +block0(v0: f32, v1: f32): + v2 = fmax v0, v1 + return v2 +} + +; check: wfmaxsb %f0, %f0, %f2, 1 +; nextln: br %r14 + +function %fmax_f64(f64, f64) -> f64 { +block0(v0: f64, v1: f64): + v2 = fmax v0, v1 + return v2 +} + +; check: wfmaxdb %f0, %f0, %f2, 1 +; nextln: br %r14 + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; SQRT +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +function %sqrt_f32(f32) -> f32 { +block0(v0: f32): + v1 = sqrt v0 + return v1 +} + +; check: sqebr %f0, %f0 +; nextln: br %r14 + +function %sqrt_f64(f64) -> f64 { +block0(v0: f64): + v1 = sqrt v0 + return v1 +} + +; check: sqdbr %f0, %f0 +; nextln: br %r14 + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; FABS +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +function %fabs_f32(f32) -> f32 { +block0(v0: f32): + v1 = fabs v0 + return v1 +} + +; check: lpebr %f0, %f0 +; nextln: br %r14 + +function %fabs_f64(f64) -> f64 { +block0(v0: f64): + v1 = fabs v0 + return v1 +} + +; check: lpdbr %f0, %f0 +; nextln: br %r14 + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; FNEG +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +function %fneg_f32(f32) -> f32 { +block0(v0: f32): + v1 = fneg v0 + return v1 +} + +; check: lcebr %f0, %f0 +; nextln: br %r14 + +function %fneg_f64(f64) -> f64 { +block0(v0: f64): + v1 = fneg v0 + return v1 +} + +; check: lcdbr %f0, %f0 +; nextln: br %r14 + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; FPROMOTE/FDEMOTE +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +function %fpromote_f32(f32) -> f64 { +block0(v0: f32): + v1 = fpromote.f64 v0 + return v1 +} + +; check: ldebr %f0, %f0 +; nextln: br %r14 + +function %fdemote_f64(f64) -> f32 { +block0(v0: f64): + v1 = fdemote.f32 v0 + return v1 +} + +; check: ledbr %f0, %f0 +; nextln: br %r14 + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; CEIL +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +function %ceil_f32(f32) -> f32 { +block0(v0: f32): + v1 = ceil v0 + return v1 +} + +; check: fiebr %f0, %f0, 6 +; nextln: br %r14 + +function %ceil_f64(f64) -> f64 { +block0(v0: f64): + v1 = ceil v0 + return v1 +} + +; check: fidbr %f0, %f0, 6 +; nextln: br %r14 + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; FLOOR +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +function %floor_f32(f32) -> f32 { +block0(v0: f32): + v1 = floor v0 + return v1 +} + +; check: fiebr %f0, %f0, 7 +; nextln: br %r14 + +function %floor_f64(f64) -> f64 { +block0(v0: f64): + v1 = floor v0 + return v1 +} + +; check: fidbr %f0, %f0, 7 +; nextln: br %r14 + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; TRUNC +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +function %trunc_f32(f32) -> f32 { +block0(v0: f32): + v1 = trunc v0 + return v1 +} + +; check: fiebr %f0, %f0, 5 +; nextln: br %r14 + +function %trunc_f64(f64) -> f64 { +block0(v0: f64): + v1 = trunc v0 + return v1 +} + +; check: fidbr %f0, %f0, 5 +; nextln: br %r14 + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; NEAREST +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +function %nearest_f32(f32) -> f32 { +block0(v0: f32): + v1 = nearest v0 + return v1 +} + +; check: fiebr %f0, %f0, 4 +; nextln: br %r14 + +function %nearest_f64(f64) -> f64 { +block0(v0: f64): + v1 = nearest v0 + return v1 +} + +; check: fidbr %f0, %f0, 4 +; nextln: br %r14 + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; FMA +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +function %fma_f32(f32, f32, f32) -> f32 { +block0(v0: f32, v1: f32, v2: f32): + v3 = fma v0, v1, v2 + return v3 +} + +; FIXME: regalloc + +; check: maebr %f4, %f0, %f2 +; nextln: ler %f0, %f4 +; nextln: br %r14 + +function %fma_f64(f64, f64, f64) -> f64 { +block0(v0: f64, v1: f64, v2: f64): + v3 = fma v0, v1, v2 + return v3 +} + +; check: madbr %f4, %f0, %f2 +; nextln: ldr %f0, %f4 +; nextln: br %r14 + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; FCOPYSIGN +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +function %fcopysign_f32(f32, f32) -> f32 { +block0(v0: f32, v1: f32): + v2 = fcopysign v0, v1 + return v2 +} + +; check: cpsdr %f0, %f2, %f0 +; nextln: br %r14 + +function %fcopysign_f64(f64, f64) -> f64 { +block0(v0: f64, v1: f64): + v2 = fcopysign v0, v1 + return v2 +} + +; check: cpsdr %f0, %f2, %f0 +; nextln: br %r14 + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; FCVT_TO_UINT/FCVT_TO_SINT +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +function %fcvt_to_uint_f32_i32(f32) -> i32 { +block0(v0: f32): + v1 = fcvt_to_uint.i32 v0 + return v1 +} + +; check: cebr %f0, %f0 +; nextln: jno 6 ; trap +; nextln: clfebr %r2, 5, %f0, 0 +; nextln: jno 6 ; trap +; nextln: br %r14 + +function %fcvt_to_sint_f32_i32(f32) -> i32 { +block0(v0: f32): + v1 = fcvt_to_sint.i32 v0 + return v1 +} + +; check: cebr %f0, %f0 +; nextln: jno 6 ; trap +; nextln: cfebra %r2, 5, %f0, 0 +; nextln: jno 6 ; trap +; nextln: br %r14 + +function %fcvt_to_uint_f32_i64(f32) -> i64 { +block0(v0: f32): + v1 = fcvt_to_uint.i64 v0 + return v1 +} + +; check: cebr %f0, %f0 +; nextln: jno 6 ; trap +; nextln: clgebr %r2, 5, %f0, 0 +; nextln: jno 6 ; trap +; nextln: br %r14 + +function %fcvt_to_sint_f32_i64(f32) -> i64 { +block0(v0: f32): + v1 = fcvt_to_sint.i64 v0 + return v1 +} + +; check: cebr %f0, %f0 +; nextln: jno 6 ; trap +; nextln: cgebra %r2, 5, %f0, 0 +; nextln: jno 6 ; trap +; nextln: br %r14 + +function %fcvt_to_uint_f64_i32(f64) -> i32 { +block0(v0: f64): + v1 = fcvt_to_uint.i32 v0 + return v1 +} + +; check: cdbr %f0, %f0 +; nextln: jno 6 ; trap +; nextln: clfdbr %r2, 5, %f0, 0 +; nextln: jno 6 ; trap +; nextln: br %r14 + +function %fcvt_to_sint_f64_i32(f64) -> i32 { +block0(v0: f64): + v1 = fcvt_to_sint.i32 v0 + return v1 +} + +; check: cdbr %f0, %f0 +; nextln: jno 6 ; trap +; nextln: cfdbra %r2, 5, %f0, 0 +; nextln: jno 6 ; trap +; nextln: br %r14 + +function %fcvt_to_uint_f64_i64(f64) -> i64 { +block0(v0: f64): + v1 = fcvt_to_uint.i64 v0 + return v1 +} + +; check: cdbr %f0, %f0 +; nextln: jno 6 ; trap +; nextln: clgdbr %r2, 5, %f0, 0 +; nextln: jno 6 ; trap +; nextln: br %r14 + +function %fcvt_to_sint_f64_i64(f64) -> i64 { +block0(v0: f64): + v1 = fcvt_to_sint.i64 v0 + return v1 +} + +; check: cdbr %f0, %f0 +; nextln: jno 6 ; trap +; nextln: cgdbra %r2, 5, %f0, 0 +; nextln: jno 6 ; trap +; nextln: br %r14 + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; FCVT_FROM_UINT/FCVT_FROM_SINT +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +function %fcvt_from_uint_i32_f32(i32) -> f32 { +block0(v0: i32): + v1 = fcvt_from_uint.f32 v0 + return v1 +} + +; check: celfbr %f0, 0, %r2, 0 +; nextln: br %r14 + +function %fcvt_from_sint_i32_f32(i32) -> f32 { +block0(v0: i32): + v1 = fcvt_from_sint.f32 v0 + return v1 +} + +; check: cefbra %f0, 0, %r2, 0 +; nextln: br %r14 + +function %fcvt_from_uint_i64_f32(i64) -> f32 { +block0(v0: i64): + v1 = fcvt_from_uint.f32 v0 + return v1 +} + +; check: celgbr %f0, 0, %r2, 0 +; nextln: br %r14 + +function %fcvt_from_sint_i64_f32(i64) -> f32 { +block0(v0: i64): + v1 = fcvt_from_sint.f32 v0 + return v1 +} + +; check: cegbra %f0, 0, %r2, 0 +; nextln: br %r14 + +function %fcvt_from_uint_i32_f64(i32) -> f64 { +block0(v0: i32): + v1 = fcvt_from_uint.f64 v0 + return v1 +} + +; check: cdlfbr %f0, 0, %r2, 0 +; nextln: br %r14 + +function %fcvt_from_sint_i32_f64(i32) -> f64 { +block0(v0: i32): + v1 = fcvt_from_sint.f64 v0 + return v1 +} + +; check: cdfbra %f0, 0, %r2, 0 +; nextln: br %r14 + +function %fcvt_from_uint_i64_f64(i64) -> f64 { +block0(v0: i64): + v1 = fcvt_from_uint.f64 v0 + return v1 +} + +; check: cdlgbr %f0, 0, %r2, 0 +; nextln: br %r14 + +function %fcvt_from_sint_i64_f64(i64) -> f64 { +block0(v0: i64): + v1 = fcvt_from_sint.f64 v0 + return v1 +} + +; check: cdgbra %f0, 0, %r2, 0 +; nextln: br %r14 + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; FCVT_TO_UINT_SAT/FCVT_TO_SINT_SAT +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +function %fcvt_to_uint_sat_f32_i32(f32) -> i32 { +block0(v0: f32): + v1 = fcvt_to_uint_sat.i32 v0 + return v1 +} + +; check: clfebr %r2, 5, %f0, 0 +; nextln: cebr %f0, %f0 +; nextln: lochio %r2, 0 +; nextln: br %r14 + +function %fcvt_to_sint_sat_f32_i32(f32) -> i32 { +block0(v0: f32): + v1 = fcvt_to_sint_sat.i32 v0 + return v1 +} + +; check: cfebra %r2, 5, %f0, 0 +; nextln: cebr %f0, %f0 +; nextln: lochio %r2, 0 +; nextln: br %r14 + +function %fcvt_to_uint_sat_f32_i64(f32) -> i64 { +block0(v0: f32): + v1 = fcvt_to_uint_sat.i64 v0 + return v1 +} + +; check: clgebr %r2, 5, %f0, 0 +; nextln: cebr %f0, %f0 +; nextln: locghio %r2, 0 +; nextln: br %r14 + +function %fcvt_to_sint_sat_f32_i64(f32) -> i64 { +block0(v0: f32): + v1 = fcvt_to_sint_sat.i64 v0 + return v1 +} + +; check: cgebra %r2, 5, %f0, 0 +; nextln: cebr %f0, %f0 +; nextln: locghio %r2, 0 +; nextln: br %r14 + +function %fcvt_to_uint_sat_f64_i32(f64) -> i32 { +block0(v0: f64): + v1 = fcvt_to_uint_sat.i32 v0 + return v1 +} + +; check: clfdbr %r2, 5, %f0, 0 +; nextln: cdbr %f0, %f0 +; nextln: lochio %r2, 0 +; nextln: br %r14 + +function %fcvt_to_sint_sat_f64_i32(f64) -> i32 { +block0(v0: f64): + v1 = fcvt_to_sint_sat.i32 v0 + return v1 +} + +; check: cfdbra %r2, 5, %f0, 0 +; nextln: cdbr %f0, %f0 +; nextln: lochio %r2, 0 +; nextln: br %r14 + +function %fcvt_to_uint_sat_f64_i64(f64) -> i64 { +block0(v0: f64): + v1 = fcvt_to_uint_sat.i64 v0 + return v1 +} + +; check: clgdbr %r2, 5, %f0, 0 +; nextln: cdbr %f0, %f0 +; nextln: locghio %r2, 0 +; nextln: br %r14 + +function %fcvt_to_sint_sat_f64_i64(f64) -> i64 { +block0(v0: f64): + v1 = fcvt_to_sint_sat.i64 v0 + return v1 +} + +; check: cgdbra %r2, 5, %f0, 0 +; nextln: cdbr %f0, %f0 +; nextln: locghio %r2, 0 +; nextln: br %r14 + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; BITCAST +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +function %bitcast_i64_f64(i64) -> f64 { +block0(v0: i64): + v1 = bitcast.f64 v0 + return v1 +} + +; check: ldgr %f0, %r2 +; nextln: br %r14 + +function %bitcast_f64_i64(f64) -> i64 { +block0(v0: f64): + v1 = bitcast.i64 v0 + return v1 +} + +; check: lgdr %r2, %f0 +; nextln: br %r14 + +function %bitcast_i32_f32(i32) -> f32 { +block0(v0: i32): + v1 = bitcast.f32 v0 + return v1 +} + +; check: sllg %r2, %r2, 32 +; nextln: ldgr %f0, %r2 +; nextln: br %r14 + +function %bitcast_f32_i32(f32) -> i32 { +block0(v0: f32): + v1 = bitcast.i32 v0 + return v1 +} + +; check: lgdr %r2, %f0 +; nextln: srlg %r2, %r2, 32 +; nextln: br %r14 diff --git a/cranelift/filetests/filetests/isa/s390x/heap_addr.clif b/cranelift/filetests/filetests/isa/s390x/heap_addr.clif new file mode 100644 index 0000000000..659ba71ae6 --- /dev/null +++ b/cranelift/filetests/filetests/isa/s390x/heap_addr.clif @@ -0,0 +1,49 @@ +test compile +target s390x + +function %dynamic_heap_check(i64 vmctx, i32) -> i64 { + gv0 = vmctx + gv1 = load.i32 notrap aligned gv0 + heap0 = dynamic gv0, bound gv1, offset_guard 0x1000, index_type i32 + +block0(v0: i64, v1: i32): + v2 = heap_addr.i64 heap0, v1, 0 + return v2 +} + +; check: Block 0: +; check: l %r4, 0(%r2) +; nextln: ahi %r4, 0 +; nextln: clr %r3, %r4 +; nextln: jgnh label1 ; jg label2 +; check: Block 1: +; check: llgfr %r5, %r3 +; nextln: agr %r2, %r5 +; nextln: lghi %r5, 0 +; nextln: clr %r3, %r4 +; nextln: locgrh %r2, %r5 +; nextln: br %r14 +; check: Block 2: +; check: trap + +function %static_heap_check(i64 vmctx, i32) -> i64 { + gv0 = vmctx + heap0 = static gv0, bound 0x1_0000, offset_guard 0x1000, index_type i32 + +block0(v0: i64, v1: i32): + v2 = heap_addr.i64 heap0, v1, 0 + return v2 +} + +; check: Block 0: +; check: clfi %r3, 65536 +; nextln: jgnh label1 ; jg label2 +; check: Block 1: +; check: llgfr %r4, %r3 +; nextln: agr %r2, %r4 +; nextln: lghi %r4, 0 +; nextln: clfi %r3, 65536 +; nextln: locgrh %r2, %r4 +; nextln: br %r14 +; check: Block 2: +; check: trap diff --git a/cranelift/filetests/filetests/isa/s390x/icmp.clif b/cranelift/filetests/filetests/isa/s390x/icmp.clif new file mode 100644 index 0000000000..ef792f34cf --- /dev/null +++ b/cranelift/filetests/filetests/isa/s390x/icmp.clif @@ -0,0 +1,604 @@ +test compile +target s390x + +function %icmp_slt_i64(i64, i64) -> b1 { +block0(v0: i64, v1: i64): + v2 = icmp.i64 slt v0, v1 + return v2 +} + +; check: cgr %r2, %r3 +; nextln: lhi %r2, 0 +; nextln: lochil %r2, 1 +; nextln: br %r14 + +function %icmp_slt_i64_ext32(i64, i32) -> b1 { +block0(v0: i64, v1: i32): + v2 = sextend.i64 v1 + v3 = icmp.i64 slt v0, v2 + return v3 +} + +; check: cgfr %r2, %r3 +; nextln: lhi %r2, 0 +; nextln: lochil %r2, 1 +; nextln: br %r14 + +function %icmp_slt_i64_imm16(i64) -> b1 { +block0(v0: i64): + v1 = iconst.i64 1 + v2 = icmp.i64 slt v0, v1 + return v2 +} + +; check: cghi %r2, 1 +; nextln: lhi %r2, 0 +; nextln: lochil %r2, 1 +; nextln: br %r14 + +function %icmp_slt_i64_imm32(i64) -> b1 { +block0(v0: i64): + v1 = iconst.i64 32768 + v2 = icmp.i64 slt v0, v1 + return v2 +} + +; check: cgfi %r2, 32768 +; nextln: lhi %r2, 0 +; nextln: lochil %r2, 1 +; nextln: br %r14 + +function %icmp_slt_i64_mem(i64, i64) -> b1 { +block0(v0: i64, v1: i64): + v2 = load.i64 v1 + v3 = icmp.i64 slt v0, v2 + return v3 +} + +; check: cg %r2, 0(%r3) +; nextln: lhi %r2, 0 +; nextln: lochil %r2, 1 +; nextln: br %r14 + +function %icmp_slt_i64_sym(i64) -> b1 { + gv0 = symbol colocated %sym +block0(v0: i64): + v1 = symbol_value.i64 gv0 + v2 = load.i64 v1 + v3 = icmp.i64 slt v0, v2 + return v3 +} + +; check: cgrl %r2, %sym + 0 +; nextln: lhi %r2, 0 +; nextln: lochil %r2, 1 +; nextln: br %r14 + +function %icmp_slt_i64_mem_ext16(i64, i64) -> b1 { +block0(v0: i64, v1: i64): + v2 = sload16.i64 v1 + v3 = icmp.i64 slt v0, v2 + return v3 +} + +; check: cgh %r2, 0(%r3) +; nextln: lhi %r2, 0 +; nextln: lochil %r2, 1 +; nextln: br %r14 + +function %icmp_slt_i64_sym_ext16(i64) -> b1 { + gv0 = symbol colocated %sym +block0(v0: i64): + v1 = symbol_value.i64 gv0 + v2 = sload16.i64 v1 + v3 = icmp.i64 slt v0, v2 + return v3 +} + +; check: cghrl %r2, %sym + 0 +; nextln: lhi %r2, 0 +; nextln: lochil %r2, 1 +; nextln: br %r14 + +function %icmp_slt_i64_mem_ext32(i64, i64) -> b1 { +block0(v0: i64, v1: i64): + v2 = sload32.i64 v1 + v3 = icmp.i64 slt v0, v2 + return v3 +} + +; check: cgf %r2, 0(%r3) +; nextln: lhi %r2, 0 +; nextln: lochil %r2, 1 +; nextln: br %r14 + +function %icmp_slt_i64_sym_ext32(i64) -> b1 { + gv0 = symbol colocated %sym +block0(v0: i64): + v1 = symbol_value.i64 gv0 + v2 = sload32.i64 v1 + v3 = icmp.i64 slt v0, v2 + return v3 +} + +; check: cgfrl %r2, %sym + 0 +; nextln: lhi %r2, 0 +; nextln: lochil %r2, 1 +; nextln: br %r14 + +function %icmp_slt_i32(i32, i32) -> b1 { +block0(v0: i32, v1: i32): + v2 = icmp.i32 slt v0, v1 + return v2 +} + +; check: cr %r2, %r3 +; nextln: lhi %r2, 0 +; nextln: lochil %r2, 1 +; nextln: br %r14 + +function %icmp_slt_i32_imm16(i32) -> b1 { +block0(v0: i32): + v1 = iconst.i32 1 + v2 = icmp.i32 slt v0, v1 + return v2 +} + +; check: chi %r2, 1 +; nextln: lhi %r2, 0 +; nextln: lochil %r2, 1 +; nextln: br %r14 + +function %icmp_slt_i32_imm(i32) -> b1 { +block0(v0: i32): + v1 = iconst.i32 32768 + v2 = icmp.i32 slt v0, v1 + return v2 +} + +; check: cfi %r2, 32768 +; nextln: lhi %r2, 0 +; nextln: lochil %r2, 1 +; nextln: br %r14 + +function %icmp_slt_i32_mem(i32, i64) -> b1 { +block0(v0: i32, v1: i64): + v2 = load.i32 v1 + v3 = icmp.i32 slt v0, v2 + return v3 +} + +; check: c %r2, 0(%r3) +; nextln: lhi %r2, 0 +; nextln: lochil %r2, 1 +; nextln: br %r14 + +function %icmp_slt_i32_memoff(i32, i64) -> b1 { +block0(v0: i32, v1: i64): + v2 = load.i32 v1+4096 + v3 = icmp.i32 slt v0, v2 + return v3 +} + +; check: cy %r2, 4096(%r3) +; nextln: lhi %r2, 0 +; nextln: lochil %r2, 1 +; nextln: br %r14 + +function %icmp_slt_i32_sym(i32) -> b1 { + gv0 = symbol colocated %sym +block0(v0: i32): + v1 = symbol_value.i64 gv0 + v2 = load.i32 v1 + v3 = icmp.i32 slt v0, v2 + return v3 +} + +; check: crl %r2, %sym + 0 +; nextln: lhi %r2, 0 +; nextln: lochil %r2, 1 +; nextln: br %r14 + +function %icmp_slt_i32_mem_ext16(i32, i64) -> b1 { +block0(v0: i32, v1: i64): + v2 = sload16.i32 v1 + v3 = icmp.i32 slt v0, v2 + return v3 +} + +; check: ch %r2, 0(%r3) +; nextln: lhi %r2, 0 +; nextln: lochil %r2, 1 +; nextln: br %r14 + +function %icmp_slt_i32_memoff_ext16(i32, i64) -> b1 { +block0(v0: i32, v1: i64): + v2 = sload16.i32 v1+4096 + v3 = icmp.i32 slt v0, v2 + return v3 +} + +; check: chy %r2, 4096(%r3) +; nextln: lhi %r2, 0 +; nextln: lochil %r2, 1 +; nextln: br %r14 + +function %icmp_slt_i32_sym_ext16(i32) -> b1 { + gv0 = symbol colocated %sym +block0(v0: i32): + v1 = symbol_value.i64 gv0 + v2 = sload16.i32 v1 + v3 = icmp.i32 slt v0, v2 + return v3 +} + +; check: chrl %r2, %sym + 0 +; nextln: lhi %r2, 0 +; nextln: lochil %r2, 1 +; nextln: br %r14 + +function %icmp_slt_i16(i16, i16) -> b1 { +block0(v0: i16, v1: i16): + v2 = icmp.i16 slt v0, v1 + return v2 +} + +; check: lhr %r2, %r2 +; nextln: lhr %r3, %r3 +; nextln: cr %r2, %r3 +; nextln: lhi %r2, 0 +; nextln: lochil %r2, 1 +; nextln: br %r14 + +function %icmp_slt_i16_imm(i16) -> b1 { +block0(v0: i16): + v1 = iconst.i16 1 + v2 = icmp.i16 slt v0, v1 + return v2 +} + +; check: lhr %r2, %r2 +; nextln: chi %r2, 1 +; nextln: lhi %r2, 0 +; nextln: lochil %r2, 1 +; nextln: br %r14 + +function %icmp_slt_i16_mem(i16, i64) -> b1 { +block0(v0: i16, v1: i64): + v2 = load.i16 v1 + v3 = icmp.i16 slt v0, v2 + return v3 +} + +; check: lhr %r2, %r2 +; nextln: ch %r2, 0(%r3) +; nextln: lhi %r2, 0 +; nextln: lochil %r2, 1 +; nextln: br %r14 + +function %icmp_slt_i16_sym(i16) -> b1 { + gv0 = symbol colocated %sym +block0(v0: i16): + v1 = symbol_value.i64 gv0 + v2 = load.i16 v1 + v3 = icmp.i16 slt v0, v2 + return v3 +} + +; check: lhr %r2, %r2 +; nextln: chrl %r2, %sym + 0 +; nextln: lhi %r2, 0 +; nextln: lochil %r2, 1 +; nextln: br %r14 + +function %icmp_slt_i8(i8, i8) -> b1 { +block0(v0: i8, v1: i8): + v2 = icmp.i8 slt v0, v1 + return v2 +} + +; check: lbr %r2, %r2 +; nextln: lbr %r3, %r3 +; nextln: cr %r2, %r3 +; nextln: lhi %r2, 0 +; nextln: lochil %r2, 1 +; nextln: br %r14 + +function %icmp_slt_i8_imm(i8) -> b1 { +block0(v0: i8): + v1 = iconst.i8 1 + v2 = icmp.i8 slt v0, v1 + return v2 +} + +; check: lbr %r2, %r2 +; nextln: chi %r2, 1 +; nextln: lhi %r2, 0 +; nextln: lochil %r2, 1 +; nextln: br %r14 + +function %icmp_slt_i8_mem(i8, i64) -> b1 { +block0(v0: i8, v1: i64): + v2 = load.i8 v1 + v3 = icmp.i8 slt v0, v2 + return v3 +} + +; check: lbr %r2, %r2 +; nextln: lb %r3, 0(%r3) +; nextln: cr %r2, %r3 +; nextln: lhi %r2, 0 +; nextln: lochil %r2, 1 +; nextln: br %r14 + +function %icmp_ult_i64(i64, i64) -> b1 { +block0(v0: i64, v1: i64): + v2 = icmp.i64 ult v0, v1 + return v2 +} + +; check: clgr %r2, %r3 +; nextln: lhi %r2, 0 +; nextln: lochil %r2, 1 +; nextln: br %r14 + +function %icmp_ult_i64_ext32(i64, i32) -> b1 { +block0(v0: i64, v1: i32): + v2 = uextend.i64 v1 + v3 = icmp.i64 ult v0, v2 + return v3 +} + +; check: clgfr %r2, %r3 +; nextln: lhi %r2, 0 +; nextln: lochil %r2, 1 +; nextln: br %r14 + +function %icmp_ult_i64_imm(i64) -> b1 { +block0(v0: i64): + v1 = iconst.i64 1 + v2 = icmp.i64 ult v0, v1 + return v2 +} + +; check: clgfi %r2, 1 +; nextln: lhi %r2, 0 +; nextln: lochil %r2, 1 +; nextln: br %r14 + +function %icmp_ult_i64_mem(i64, i64) -> b1 { +block0(v0: i64, v1: i64): + v2 = load.i64 v1 + v3 = icmp.i64 ult v0, v2 + return v3 +} + +; check: clg %r2, 0(%r3) +; nextln: lhi %r2, 0 +; nextln: lochil %r2, 1 +; nextln: br %r14 + +function %icmp_ult_i64_sym(i64) -> b1 { + gv0 = symbol colocated %sym +block0(v0: i64): + v1 = symbol_value.i64 gv0 + v2 = load.i64 v1 + v3 = icmp.i64 ult v0, v2 + return v3 +} + +; check: clgrl %r2, %sym + 0 +; nextln: lhi %r2, 0 +; nextln: lochil %r2, 1 +; nextln: br %r14 + +function %icmp_ult_i64_mem_ext32(i64, i64) -> b1 { +block0(v0: i64, v1: i64): + v2 = uload32.i64 v1 + v3 = icmp.i64 ult v0, v2 + return v3 +} + +; check: clgf %r2, 0(%r3) +; nextln: lhi %r2, 0 +; nextln: lochil %r2, 1 +; nextln: br %r14 + +function %icmp_ult_i64_sym_ext32(i64) -> b1 { + gv0 = symbol colocated %sym +block0(v0: i64): + v1 = symbol_value.i64 gv0 + v2 = uload32.i64 v1 + v3 = icmp.i64 ult v0, v2 + return v3 +} + +; check: clgfrl %r2, %sym + 0 +; nextln: lhi %r2, 0 +; nextln: lochil %r2, 1 +; nextln: br %r14 + +function %icmp_ult_i64_sym_ext16(i64) -> b1 { + gv0 = symbol colocated %sym +block0(v0: i64): + v1 = symbol_value.i64 gv0 + v2 = uload16.i64 v1 + v3 = icmp.i64 ult v0, v2 + return v3 +} + +; check: clghrl %r2, %sym + 0 +; nextln: lhi %r2, 0 +; nextln: lochil %r2, 1 +; nextln: br %r14 + +function %icmp_ult_i32(i32, i32) -> b1 { +block0(v0: i32, v1: i32): + v2 = icmp.i32 ult v0, v1 + return v2 +} + +; check: clr %r2, %r3 +; nextln: lhi %r2, 0 +; nextln: lochil %r2, 1 +; nextln: br %r14 + +function %icmp_ult_i32_imm(i32) -> b1 { +block0(v0: i32): + v1 = iconst.i32 1 + v2 = icmp.i32 ult v0, v1 + return v2 +} + +; check: clfi %r2, 1 +; nextln: lhi %r2, 0 +; nextln: lochil %r2, 1 +; nextln: br %r14 + +function %icmp_ult_i32_mem(i32, i64) -> b1 { +block0(v0: i32, v1: i64): + v2 = load.i32 v1 + v3 = icmp.i32 ult v0, v2 + return v3 +} + +; check: cl %r2, 0(%r3) +; nextln: lhi %r2, 0 +; nextln: lochil %r2, 1 +; nextln: br %r14 + +function %icmp_ult_i32_memoff(i32, i64) -> b1 { +block0(v0: i32, v1: i64): + v2 = load.i32 v1+4096 + v3 = icmp.i32 ult v0, v2 + return v3 +} + +; check: cly %r2, 4096(%r3) +; nextln: lhi %r2, 0 +; nextln: lochil %r2, 1 +; nextln: br %r14 + +function %icmp_ult_i32_sym(i32) -> b1 { + gv0 = symbol colocated %sym +block0(v0: i32): + v1 = symbol_value.i64 gv0 + v2 = load.i32 v1 + v3 = icmp.i32 ult v0, v2 + return v3 +} + +; check: clrl %r2, %sym + 0 +; nextln: lhi %r2, 0 +; nextln: lochil %r2, 1 +; nextln: br %r14 + +function %icmp_ult_i32_sym_ext16(i32) -> b1 { + gv0 = symbol colocated %sym +block0(v0: i32): + v1 = symbol_value.i64 gv0 + v2 = uload16.i32 v1 + v3 = icmp.i32 ult v0, v2 + return v3 +} + +; check: clhrl %r2, %sym + 0 +; nextln: lhi %r2, 0 +; nextln: lochil %r2, 1 +; nextln: br %r14 + +function %icmp_ult_i16(i16, i16) -> b1 { +block0(v0: i16, v1: i16): + v2 = icmp.i16 ult v0, v1 + return v2 +} + +; check: llhr %r2, %r2 +; nextln: llhr %r3, %r3 +; nextln: clr %r2, %r3 +; nextln: lhi %r2, 0 +; nextln: lochil %r2, 1 +; nextln: br %r14 + +function %icmp_ult_i16_imm(i16) -> b1 { +block0(v0: i16): + v1 = iconst.i16 1 + v2 = icmp.i16 ult v0, v1 + return v2 +} + +; check: llhr %r2, %r2 +; nextln: clfi %r2, 1 +; nextln: lhi %r2, 0 +; nextln: lochil %r2, 1 +; nextln: br %r14 + +function %icmp_ult_i16_mem(i16, i64) -> b1 { +block0(v0: i16, v1: i64): + v2 = load.i16 v1 + v3 = icmp.i16 ult v0, v2 + return v3 +} + +; check: llhr %r2, %r2 +; nextln: llh %r3, 0(%r3) +; nextln: clr %r2, %r3 +; nextln: lhi %r2, 0 +; nextln: lochil %r2, 1 +; nextln: br %r14 + +function %icmp_ult_i16_mem(i16) -> b1 { + gv0 = symbol colocated %sym +block0(v0: i16): + v1 = symbol_value.i64 gv0 + v2 = load.i16 v1 + v3 = icmp.i16 ult v0, v2 + return v3 +} + +; check: llhr %r2, %r2 +; nextln: clhrl %r2, %sym + 0 +; nextln: lhi %r2, 0 +; nextln: lochil %r2, 1 +; nextln: br %r14 + +function %icmp_ult_i8(i8, i8) -> b1 { +block0(v0: i8, v1: i8): + v2 = icmp.i8 ult v0, v1 + return v2 +} + +; check: llcr %r2, %r2 +; nextln: llcr %r3, %r3 +; nextln: clr %r2, %r3 +; nextln: lhi %r2, 0 +; nextln: lochil %r2, 1 +; nextln: br %r14 + +function %icmp_ult_i8_imm(i8) -> b1 { +block0(v0: i8): + v1 = iconst.i8 1 + v2 = icmp.i8 ult v0, v1 + return v2 +} + +; check: llcr %r2, %r2 +; nextln: clfi %r2, 1 +; nextln: lhi %r2, 0 +; nextln: lochil %r2, 1 +; nextln: br %r14 + +function %icmp_ult_i8_mem(i8, i64) -> b1 { +block0(v0: i8, v1: i64): + v2 = load.i8 v1 + v3 = icmp.i8 ult v0, v2 + return v3 +} + +; check: llcr %r2, %r2 +; nextln: llc %r3, 0(%r3) +; nextln: clr %r2, %r3 +; nextln: lhi %r2, 0 +; nextln: lochil %r2, 1 +; nextln: br %r14 + diff --git a/cranelift/filetests/filetests/isa/s390x/jumptable.clif b/cranelift/filetests/filetests/isa/s390x/jumptable.clif new file mode 100644 index 0000000000..1930c0367a --- /dev/null +++ b/cranelift/filetests/filetests/isa/s390x/jumptable.clif @@ -0,0 +1,45 @@ +test compile +target s390x + +function %f(i64) -> i64 { + jt0 = jump_table [block1, block2, block3] + +block0(v0: i64): + br_table v0, block4, jt0 + +block1: + v1 = iconst.i64 1 + jump block5(v1) + +block2: + v2 = iconst.i64 2 + jump block5(v2) + +block3: + v3 = iconst.i64 3 + jump block5(v3) + +block4: + v4 = iconst.i64 4 + jump block5(v4) + +block5(v5: i64): + v6 = iadd.i64 v0, v5 + return v6 +} + +; check: clgfi %r2, 3 ; jghe label1 ; sllg %r4, %r2, 2 ; larl %r3, 18 ; lgf %r4, 0(%r4, %r3) ; agrk %r3, %r3, %r4 ; br %r3 ; jt_entries + +; check: lghi %r3, 1 +; nextln: jg + +; check: lghi %r3, 2 +; nextln: jg + +; check: lghi %r3, 3 +; nextln: jg + +; check: agr %r2, %r3 +; nextln: br %r14 + + diff --git a/cranelift/filetests/filetests/isa/s390x/load-little.clif b/cranelift/filetests/filetests/isa/s390x/load-little.clif new file mode 100644 index 0000000000..6561863d73 --- /dev/null +++ b/cranelift/filetests/filetests/isa/s390x/load-little.clif @@ -0,0 +1,258 @@ +test compile +target s390x + +function %load_i64(i64) -> i64 { +block0(v0: i64): + v1 = load.i64 little v0 + return v1 +} + +; check: lrvg %r2, 0(%r2) +; nextln: br %r14 + +function %load_i64_sym() -> i64 { + gv0 = symbol colocated %sym +block0: + v0 = symbol_value.i64 gv0 + v1 = load.i64 little v0 + return v1 +} + +; check: larl %r1, %sym + 0 ; lrvg %r2, 0(%r1) +; nextln: br %r14 + +function %uload8_i64(i64) -> i64 { +block0(v0: i64): + v1 = uload8.i64 little v0 + return v1 +} + +; check: llgc %r2, 0(%r2) +; nextln: br %r14 + +function %sload8_i64(i64) -> i64 { +block0(v0: i64): + v1 = sload8.i64 little v0 + return v1 +} + +; check: lgb %r2, 0(%r2) +; nextln: br %r14 + +function %uload16_i64(i64) -> i64 { +block0(v0: i64): + v1 = uload16.i64 little v0 + return v1 +} + +; check: lrvh %r2, 0(%r2) +; nextln: llghr %r2, %r2 +; nextln: br %r14 + +function %uload16_i64_sym() -> i64 { + gv0 = symbol colocated %sym +block0: + v0 = symbol_value.i64 gv0 + v1 = uload16.i64 little v0 + return v1 +} + +; check: larl %r1, %sym + 0 ; lrvh %r2, 0(%r1) +; nextln: llghr %r2, %r2 +; nextln: br %r14 + +function %sload16_i64(i64) -> i64 { +block0(v0: i64): + v1 = sload16.i64 little v0 + return v1 +} + +; check: lrvh %r2, 0(%r2) +; nextln: lghr %r2, %r2 +; nextln: br %r14 + +function %sload16_i64_sym() -> i64 { + gv0 = symbol colocated %sym +block0: + v0 = symbol_value.i64 gv0 + v1 = sload16.i64 little v0 + return v1 +} + +; check: larl %r1, %sym + 0 ; lrvh %r2, 0(%r1) +; nextln: lghr %r2, %r2 +; nextln: br %r14 + +function %uload32_i64(i64) -> i64 { +block0(v0: i64): + v1 = uload32.i64 little v0 + return v1 +} + +; check: lrv %r2, 0(%r2) +; nextln: llgfr %r2, %r2 +; nextln: br %r14 + +function %uload32_i64_sym() -> i64 { + gv0 = symbol colocated %sym +block0: + v0 = symbol_value.i64 gv0 + v1 = uload32.i64 little v0 + return v1 +} + +; check: larl %r1, %sym + 0 ; lrv %r2, 0(%r1) +; nextln: llgfr %r2, %r2 +; nextln: br %r14 + +function %sload32_i64(i64) -> i64 { +block0(v0: i64): + v1 = sload32.i64 little v0 + return v1 +} + +; check: lrv %r2, 0(%r2) +; nextln: lgfr %r2, %r2 +; nextln: br %r14 + +function %sload32_i64_sym() -> i64 { + gv0 = symbol colocated %sym +block0: + v0 = symbol_value.i64 gv0 + v1 = sload32.i64 little v0 + return v1 +} + +; check: larl %r1, %sym + 0 ; lrv %r2, 0(%r1) +; nextln: lgfr %r2, %r2 +; nextln: br %r14 + +function %load_i32(i64) -> i32 { +block0(v0: i64): + v1 = load.i32 little v0 + return v1 +} + +; check: lrv %r2, 0(%r2) +; nextln: br %r14 + +function %load_i32_sym() -> i32 { + gv0 = symbol colocated %sym +block0: + v0 = symbol_value.i64 gv0 + v1 = load.i32 little v0 + return v1 +} + +; check: larl %r1, %sym + 0 ; lrv %r2, 0(%r1) +; nextln: br %r14 + +function %uload8_i32(i64) -> i32 { +block0(v0: i64): + v1 = uload8.i32 little v0 + return v1 +} + +; check: llc %r2, 0(%r2) +; nextln: br %r14 + +function %sload8_i32(i64) -> i32 { +block0(v0: i64): + v1 = sload8.i32 little v0 + return v1 +} + +; check: lb %r2, 0(%r2) +; nextln: br %r14 + +function %uload16_i32(i64) -> i32 { +block0(v0: i64): + v1 = uload16.i32 little v0 + return v1 +} + +; check: lrvh %r2, 0(%r2) +; nextln: llhr %r2, %r2 +; nextln: br %r14 + +function %uload16_i32_sym() -> i32 { + gv0 = symbol colocated %sym +block0: + v0 = symbol_value.i64 gv0 + v1 = uload16.i32 little v0 + return v1 +} + +; check: larl %r1, %sym + 0 ; lrvh %r2, 0(%r1) +; nextln: llhr %r2, %r2 +; nextln: br %r14 + +function %sload16_i32(i64) -> i32 { +block0(v0: i64): + v1 = sload16.i32 little v0 + return v1 +} + +; check: lrvh %r2, 0(%r2) +; nextln: lhr %r2, %r2 +; nextln: br %r14 + +function %sload16_i32_sym() -> i32 { + gv0 = symbol colocated %sym +block0: + v0 = symbol_value.i64 gv0 + v1 = sload16.i32 little v0 + return v1 +} + +; check: larl %r1, %sym + 0 ; lrvh %r2, 0(%r1) +; nextln: lhr %r2, %r2 +; nextln: br %r14 + +function %load_i16(i64) -> i16 { +block0(v0: i64): + v1 = load.i16 little v0 + return v1 +} + +; check: lrvh %r2, 0(%r2) +; nextln: br %r14 + +function %load_i16_sym() -> i16 { + gv0 = symbol colocated %sym +block0: + v0 = symbol_value.i64 gv0 + v1 = load.i16 little v0 + return v1 +} + +; check: larl %r1, %sym + 0 ; lrvh %r2, 0(%r1) +; nextln: br %r14 + +function %uload8_i16(i64) -> i16 { +block0(v0: i64): + v1 = uload8.i16 little v0 + return v1 +} + +; check: llc %r2, 0(%r2) +; nextln: br %r14 + +function %sload8_i16(i64) -> i16 { +block0(v0: i64): + v1 = sload8.i16 little v0 + return v1 +} + +; check: lb %r2, 0(%r2) +; nextln: br %r14 + +function %load_i8(i64) -> i8 { +block0(v0: i64): + v1 = load.i8 little v0 + return v1 +} + +; check: llc %r2, 0(%r2) +; nextln: br %r14 + diff --git a/cranelift/filetests/filetests/isa/s390x/load.clif b/cranelift/filetests/filetests/isa/s390x/load.clif new file mode 100644 index 0000000000..8d46fe0867 --- /dev/null +++ b/cranelift/filetests/filetests/isa/s390x/load.clif @@ -0,0 +1,264 @@ +test compile +target s390x + +function %load_i64(i64) -> i64 { +block0(v0: i64): + v1 = load.i64 v0 + return v1 +} + +; check: lg %r2, 0(%r2) +; nextln: br %r14 + +function %load_i64_sym() -> i64 { + gv0 = symbol colocated %sym +block0: + v0 = symbol_value.i64 gv0 + v1 = load.i64 v0 + return v1 +} + +; check: lgrl %r2, %sym + 0 +; nextln: br %r14 + +function %uload8_i64(i64) -> i64 { +block0(v0: i64): + v1 = uload8.i64 v0 + return v1 +} + +; check: llgc %r2, 0(%r2) +; nextln: br %r14 + +function %sload8_i64(i64) -> i64 { +block0(v0: i64): + v1 = sload8.i64 v0 + return v1 +} + +; check: lgb %r2, 0(%r2) +; nextln: br %r14 + +function %uload16_i64(i64) -> i64 { +block0(v0: i64): + v1 = uload16.i64 v0 + return v1 +} + +; check: llgh %r2, 0(%r2) +; nextln: br %r14 + +function %uload16_i64_sym() -> i64 { + gv0 = symbol colocated %sym +block0: + v0 = symbol_value.i64 gv0 + v1 = uload16.i64 v0 + return v1 +} + +; check: llghrl %r2, %sym + 0 +; nextln: br %r14 + +function %sload16_i64(i64) -> i64 { +block0(v0: i64): + v1 = sload16.i64 v0 + return v1 +} + +; check: lgh %r2, 0(%r2) +; nextln: br %r14 + +function %sload16_i64_sym() -> i64 { + gv0 = symbol colocated %sym +block0: + v0 = symbol_value.i64 gv0 + v1 = sload16.i64 v0 + return v1 +} + +; check: lghrl %r2, %sym + 0 +; nextln: br %r14 + +function %uload32_i64(i64) -> i64 { +block0(v0: i64): + v1 = uload32.i64 v0 + return v1 +} + +; check: llgf %r2, 0(%r2) +; nextln: br %r14 + +function %uload32_i64_sym() -> i64 { + gv0 = symbol colocated %sym +block0: + v0 = symbol_value.i64 gv0 + v1 = uload32.i64 v0 + return v1 +} + +; check: llgfrl %r2, %sym + 0 +; nextln: br %r14 + +function %sload32_i64(i64) -> i64 { +block0(v0: i64): + v1 = sload32.i64 v0 + return v1 +} + +; check: lgf %r2, 0(%r2) +; nextln: br %r14 + +function %sload32_i64_sym() -> i64 { + gv0 = symbol colocated %sym +block0: + v0 = symbol_value.i64 gv0 + v1 = sload32.i64 v0 + return v1 +} + +; check: lgfrl %r2, %sym + 0 +; nextln: br %r14 + +function %load_i32(i64) -> i32 { +block0(v0: i64): + v1 = load.i32 v0 + return v1 +} + +; check: l %r2, 0(%r2) +; nextln: br %r14 + +function %load_i32_sym() -> i32 { + gv0 = symbol colocated %sym +block0: + v0 = symbol_value.i64 gv0 + v1 = load.i32 v0 + return v1 +} + +; check: lrl %r2, %sym + 0 +; nextln: br %r14 + +function %load_i32_off(i64) -> i32 { +block0(v0: i64): + v1 = load.i32 v0+4096 + return v1 +} + +; check: ly %r2, 4096(%r2) +; nextln: br %r14 + +function %uload8_i32(i64) -> i32 { +block0(v0: i64): + v1 = uload8.i32 v0 + return v1 +} + +; check: llc %r2, 0(%r2) +; nextln: br %r14 + +function %sload8_i32(i64) -> i32 { +block0(v0: i64): + v1 = sload8.i32 v0 + return v1 +} + +; check: lb %r2, 0(%r2) +; nextln: br %r14 + +function %uload16_i32(i64) -> i32 { +block0(v0: i64): + v1 = uload16.i32 v0 + return v1 +} + +; check: llh %r2, 0(%r2) +; nextln: br %r14 + +function %uload16_i32_sym() -> i32 { + gv0 = symbol colocated %sym +block0: + v0 = symbol_value.i64 gv0 + v1 = uload16.i32 v0 + return v1 +} + +; check: llhrl %r2, %sym + 0 +; nextln: br %r14 + +function %sload16_i32(i64) -> i32 { +block0(v0: i64): + v1 = sload16.i32 v0 + return v1 +} + +; check: lh %r2, 0(%r2) +; nextln: br %r14 + +function %sload16_i32_off(i64) -> i32 { +block0(v0: i64): + v1 = sload16.i32 v0+4096 + return v1 +} + +; check: lhy %r2, 4096(%r2) +; nextln: br %r14 + +function %sload16_i32_sym() -> i32 { + gv0 = symbol colocated %sym +block0: + v0 = symbol_value.i64 gv0 + v1 = sload16.i32 v0 + return v1 +} + +; check: lhrl %r2, %sym + 0 +; nextln: br %r14 + +function %load_i16(i64) -> i16 { +block0(v0: i64): + v1 = load.i16 v0 + return v1 +} + +; check: llh %r2, 0(%r2) +; nextln: br %r14 + +function %load_i16_sym() -> i16 { + gv0 = symbol colocated %sym +block0: + v0 = symbol_value.i64 gv0 + v1 = load.i16 v0 + return v1 +} + +; check: llhrl %r2, %sym + 0 +; nextln: br %r14 + +function %uload8_i16(i64) -> i16 { +block0(v0: i64): + v1 = uload8.i16 v0 + return v1 +} + +; check: llc %r2, 0(%r2) +; nextln: br %r14 + +function %sload8_i16(i64) -> i16 { +block0(v0: i64): + v1 = sload8.i16 v0 + return v1 +} + +; check: lb %r2, 0(%r2) +; nextln: br %r14 + +function %load_i8(i64) -> i8 { +block0(v0: i64): + v1 = load.i8 v0 + return v1 +} + +; check: llc %r2, 0(%r2) +; nextln: br %r14 + diff --git a/cranelift/filetests/filetests/isa/s390x/multivalue-ret.clif b/cranelift/filetests/filetests/isa/s390x/multivalue-ret.clif new file mode 100644 index 0000000000..d9197d3072 --- /dev/null +++ b/cranelift/filetests/filetests/isa/s390x/multivalue-ret.clif @@ -0,0 +1,79 @@ +test compile +target s390x + +;; Test default (non-SpiderMonkey) ABI. +function %f1() -> i64, i64, i64, i64 { +block1: + v0 = iconst.i64 1 + v1 = iconst.i64 2 + v2 = iconst.i64 3 + v3 = iconst.i64 4 + return v0, v1, v2, v3 +} + +; check: lghi %r2, 1 +; nextln: lghi %r3, 2 +; nextln: lghi %r4, 3 +; nextln: lghi %r5, 4 +; nextln: br %r14 + +function %f1() -> i64, i64, i64, i64, i64, i64 { +block1: + v0 = iconst.i64 1 + v1 = iconst.i64 2 + v2 = iconst.i64 3 + v3 = iconst.i64 4 + v4 = iconst.i64 5 + v5 = iconst.i64 6 + return v0, v1, v2, v3, v4, v5 +} + +; check: stmg %r12, %r15, 96(%r15) +; nextln: lgr %r14, %r2 +; nextln: lghi %r2, 1 +; nextln: lghi %r3, 2 +; nextln: lghi %r4, 3 +; nextln: lghi %r5, 4 +; nextln: lghi %r13, 5 +; nextln: lghi %r12, 6 +; nextln: stg %r13, 0(%r14) +; nextln: stg %r12, 8(%r14) +; nextln: lmg %r12, %r15, 96(%r15) +; nextln: br %r14 + +;; Test default (non-SpiderMonkey) ABI. +function %f3() -> f64, f64, f64, f64 { +block1: + v0 = f64const 0x0.0 + v1 = f64const 0x1.0 + v2 = f64const 0x2.0 + v3 = f64const 0x3.0 + return v0, v1, v2, v3 +} + +; check: bras %r1, 12 ; data.f64 0 ; ld %f0, 0(%r1) +; nextln: bras %r1, 12 ; data.f64 1 ; ld %f2, 0(%r1) +; nextln: bras %r1, 12 ; data.f64 2 ; ld %f4, 0(%r1) +; nextln: bras %r1, 12 ; data.f64 3 ; ld %f6, 0(%r1) +; nextln: br %r14 + +function %f4() -> f64, f64, f64, f64, f64, f64 { +block1: + v0 = f64const 0x0.0 + v1 = f64const 0x1.0 + v2 = f64const 0x2.0 + v3 = f64const 0x3.0 + v4 = f64const 0x4.0 + v5 = f64const 0x5.0 + return v0, v1, v2, v3, v4, v5 +} + +; check: bras %r1, 12 ; data.f64 0 ; ld %f0, 0(%r1) +; nextln: bras %r1, 12 ; data.f64 1 ; ld %f2, 0(%r1) +; nextln: bras %r1, 12 ; data.f64 2 ; ld %f4, 0(%r1) +; nextln: bras %r1, 12 ; data.f64 3 ; ld %f6, 0(%r1) +; nextln: bras %r1, 12 ; data.f64 4 ; ld %f1, 0(%r1) +; nextln: bras %r1, 12 ; data.f64 5 ; ld %f3, 0(%r1) +; nextln: std %f1, 0(%r2) +; nextln: std %f3, 8(%r2) +; nextln: br %r14 diff --git a/cranelift/filetests/filetests/isa/s390x/reftypes.clif b/cranelift/filetests/filetests/isa/s390x/reftypes.clif new file mode 100644 index 0000000000..0ffdcab20e --- /dev/null +++ b/cranelift/filetests/filetests/isa/s390x/reftypes.clif @@ -0,0 +1,101 @@ +test compile +target s390x + +function %f0(r64, r64) -> r64 { +block0(v0: r64, v1: r64): + return v1 +} + +; check: lgr %r2, %r3 +; nextln: br %r14 + +function %f1(r64) -> b1 { +block0(v0: r64): + v1 = is_null v0 + return v1 +} + +; check: cghi %r2, 0 +; nextln: lhi %r2, 0 +; nextln: lochie %r2, 1 +; nextln: br %r14 + +function %f2(r64) -> b1 { +block0(v0: r64): + v1 = is_invalid v0 + return v1 +} + +; check: cghi %r2, -1 +; nextln: lhi %r2, 0 +; nextln: lochie %r2, 1 +; nextln: br %r14 + +function %f3() -> r64 { +block0: + v0 = null.r64 + return v0 +} + +; check: lghi %r2, 0 +; nextln: br %r14 + +function %f4(r64, r64) -> r64, r64, r64 { + fn0 = %f(r64) -> b1 + ss0 = explicit_slot 8 + +block0(v0: r64, v1: r64): + v2 = call fn0(v0) + stack_store.r64 v0, ss0 + brz v2, block1(v1, v0) + jump block2(v0, v1) + +block1(v3: r64, v4: r64): + jump block3(v3, v4) + +block2(v5: r64, v6: r64): + jump block3(v5, v6) + +block3(v7: r64, v8: r64): + v9 = stack_load.r64 ss0 + return v7, v8, v9 +} + +; check: Block 0: +; check: stmg %r12, %r15, 96(%r15) +; nextln: aghi %r15, -192 +; nextln: virtual_sp_offset_adjust 160 +; nextln: lgr %r13, %r2 +; nextln: lgr %r12, %r3 +; nextln: lgr %r2, %r13 +; nextln: bras %r1, 12 ; data %f + 0 ; lg %r3, 0(%r1) +; nextln: stg %r2, 168(%r15) +; nextln: stg %r13, 176(%r15) +; nextln: stg %r12, 184(%r15) +; nextln: (safepoint: slots [S0, S1, S2] +; nextln: basr %r14, %r3 +; nextln: lg %r13, 176(%r15) +; nextln: lg %r12, 184(%r15) +; nextln: la %r3, 160(%r15) +; nextln: stg %r13, 0(%r3) +; nextln: llcr %r2, %r2 +; nextln: chi %r2, 0 +; nextln: jgnlh label1 ; jg label3 +; check: Block 1: +; check: jg label2 +; check: Block 2: +; check: lgr %r2, %r12 +; nextln: jg label5 +; check: Block 3: +; check: jg label4 +; check: Block 4: +; check: lgr %r2, %r13 +; nextln: lgr %r13, %r12 +; nextln: jg label5 +; check: Block 5: +; check: la %r3, 160(%r15) +; nextln: lg %r3, 0(%r3) +; nextln: lgr %r4, %r3 +; nextln: lgr %r3, %r13 +; nextln: lmg %r12, %r15, 288(%r15) +; nextln: br %r14 diff --git a/cranelift/filetests/filetests/isa/s390x/saturating-ops.clif b/cranelift/filetests/filetests/isa/s390x/saturating-ops.clif new file mode 100644 index 0000000000..193a02eaad --- /dev/null +++ b/cranelift/filetests/filetests/isa/s390x/saturating-ops.clif @@ -0,0 +1,12 @@ +test compile +target s390x + +; FIXME: not yet supported + +function %uaddsat64(i64, i64) -> i64 { +block0(v0: i64, v1: i64): +; v2 = uadd_sat.i64 v0, v1 + v2 = iconst.i64 0 + return v2 +} + diff --git a/cranelift/filetests/filetests/isa/s390x/shift-rotate.clif b/cranelift/filetests/filetests/isa/s390x/shift-rotate.clif new file mode 100644 index 0000000000..a275b997c8 --- /dev/null +++ b/cranelift/filetests/filetests/isa/s390x/shift-rotate.clif @@ -0,0 +1,461 @@ +test compile +target s390x + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; ROTR +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +function %rotr_i64_reg(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2 = rotr.i64 v0, v1 + return v2 +} + +; check: lcgr %r3, %r3 +; nextln: rllg %r2, %r2, 0(%r3) +; nextln: br %r14 + +function %rotr_i64_imm(i64) -> i64 { +block0(v0: i64): + v1 = iconst.i32 17 + v2 = rotr.i64 v0, v1 + return v2 +} + +; check: rllg %r2, %r2, 47 +; nextln: br %r14 + +function %rotr_i32_reg(i32, i32) -> i32 { +block0(v0: i32, v1: i32): + v2 = rotr.i32 v0, v1 + return v2 +} + +; check: lcr %r3, %r3 +; nextln: rll %r2, %r2, 0(%r3) +; nextln: br %r14 + +function %rotr_i32_imm(i32) -> i32 { +block0(v0: i32): + v1 = iconst.i32 17 + v2 = rotr.i32 v0, v1 + return v2 +} + +; check: rll %r2, %r2, 15 +; nextln: br %r14 + +function %rotr_i16_reg(i16, i16) -> i16 { +block0(v0: i16, v1: i16): + v2 = rotr.i16 v0, v1 + return v2 +} + +; check: llhr %r2, %r2 +; nextln: lr %r5, %r3 +; nextln: lcr %r4, %r3 +; nextln: nill %r5, 15 +; nextln: nill %r4, 15 +; nextln: sllk %r3, %r2, 0(%r5) +; nextln: srlk %r2, %r2, 0(%r4) +; nextln: ork %r2, %r3, %r2 +; nextln: br %r14 + +function %rotr_i16_imm(i16) -> i16 { +block0(v0: i16): + v1 = iconst.i32 10 + v2 = rotr.i16 v0, v1 + return v2 +} + +; check: llhr %r2, %r2 +; nextln: sllk %r3, %r2, 6 +; nextln: srlk %r2, %r2, 10 +; nextln: ork %r2, %r3, %r2 +; nextln: br %r14 + +function %rotr_i8_reg(i8, i8) -> i8 { +block0(v0: i8, v1: i8): + v2 = rotr.i8 v0, v1 + return v2 +} + +; check: llcr %r2, %r2 +; nextln: lr %r5, %r3 +; nextln: lcr %r4, %r3 +; nextln: nill %r5, 7 +; nextln: nill %r4, 7 +; nextln: sllk %r3, %r2, 0(%r5) +; nextln: srlk %r2, %r2, 0(%r4) +; nextln: ork %r2, %r3, %r2 +; nextln: br %r14 + +function %rotr_i8_imm(i8) -> i8 { +block0(v0: i8): + v1 = iconst.i32 3 + v2 = rotr.i8 v0, v1 + return v2 +} + +; check: llcr %r2, %r2 +; nextln: sllk %r3, %r2, 5 +; nextln: srlk %r2, %r2, 3 +; nextln: ork %r2, %r3, %r2 +; nextln: br %r14 + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; ROTL +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +function %rotl_i64_reg(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2 = rotl.i64 v0, v1 + return v2 +} + +; check: rllg %r2, %r2, 0(%r3) +; nextln: br %r14 + +function %rotl_i64_imm(i64) -> i64 { +block0(v0: i64): + v1 = iconst.i32 17 + v2 = rotl.i64 v0, v1 + return v2 +} + +; check: rllg %r2, %r2, 17 +; nextln: br %r14 + +function %rotl_i32_reg(i32, i32) -> i32 { +block0(v0: i32, v1: i32): + v2 = rotl.i32 v0, v1 + return v2 +} + +; check: rll %r2, %r2, 0(%r3) +; nextln: br %r14 + +function %rotl_i32_imm(i32) -> i32 { +block0(v0: i32): + v1 = iconst.i32 17 + v2 = rotl.i32 v0, v1 + return v2 +} + +; check: rll %r2, %r2, 17 +; nextln: br %r14 + +function %rotl_i16_reg(i16, i16) -> i16 { +block0(v0: i16, v1: i16): + v2 = rotl.i16 v0, v1 + return v2 +} + +; check: llhr %r2, %r2 +; nextln: lr %r4, %r3 +; nextln: lcr %r3, %r3 +; nextln: nill %r4, 15 +; nextln: nill %r3, 15 +; nextln: sllk %r3, %r2, 0(%r3) +; nextln: srlk %r2, %r2, 0(%r4) +; nextln: ork %r2, %r3, %r2 +; nextln: br %r14 + +function %rotl_i16_imm(i16) -> i16 { +block0(v0: i16): + v1 = iconst.i32 10 + v2 = rotl.i16 v0, v1 + return v2 +} + +; check: llhr %r2, %r2 +; nextln: sllk %r3, %r2, 10 +; nextln: srlk %r2, %r2, 6 +; nextln: ork %r2, %r3, %r2 +; nextln: br %r14 + +function %rotl_i8_reg(i8, i8) -> i8 { +block0(v0: i8, v1: i8): + v2 = rotl.i8 v0, v1 + return v2 +} + +; check: llcr %r2, %r2 +; nextln: lr %r4, %r3 +; nextln: lcr %r3, %r3 +; nextln: nill %r4, 7 +; nextln: nill %r3, 7 +; nextln: sllk %r3, %r2, 0(%r3) +; nextln: srlk %r2, %r2, 0(%r4) +; nextln: ork %r2, %r3, %r2 +; nextln: br %r14 + +function %rotr_i8_imm(i8) -> i8 { +block0(v0: i8): + v1 = iconst.i32 3 + v2 = rotl.i8 v0, v1 + return v2 +} + +; check: llcr %r2, %r2 +; nextln: sllk %r3, %r2, 3 +; nextln: srlk %r2, %r2, 5 +; nextln: ork %r2, %r3, %r2 +; nextln: br %r14 + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; USHR +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +function %ushr_i64_reg(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2 = ushr.i64 v0, v1 + return v2 +} + +; check: srlg %r2, %r2, 0(%r3) +; nextln: br %r14 + +function %ushr_i64_imm(i64) -> i64 { +block0(v0: i64): + v1 = iconst.i32 17 + v2 = ushr.i64 v0, v1 + return v2 +} + +; check: srlg %r2, %r2, 17 +; nextln: br %r14 + +function %ushr_i32_reg(i32, i32) -> i32 { +block0(v0: i32, v1: i32): + v2 = ushr.i32 v0, v1 + return v2 +} + +; check: srlk %r2, %r2, 0(%r3) +; nextln: br %r14 + +function %ushr_i32_imm(i32) -> i32 { +block0(v0: i32): + v1 = iconst.i32 17 + v2 = ushr.i32 v0, v1 + return v2 +} + +; check: srlk %r2, %r2, 17 +; nextln: br %r14 + +function %ushr_i16_reg(i16, i16) -> i16 { +block0(v0: i16, v1: i16): + v2 = ushr.i16 v0, v1 + return v2 +} + +; FIXME: check shift count ? + +; check: llhr %r2, %r2 +; nextln: nill %r3, 31 +; nextln: srlk %r2, %r2, 0(%r3) +; nextln: br %r14 + +function %ushr_i16_imm(i16) -> i16 { +block0(v0: i16): + v1 = iconst.i32 10 + v2 = ushr.i16 v0, v1 + return v2 +} + +; check: llhr %r2, %r2 +; nextln: srlk %r2, %r2, 10 +; nextln: br %r14 + +function %ushr_i8_reg(i8, i8) -> i8 { +block0(v0: i8, v1: i8): + v2 = ushr.i8 v0, v1 + return v2 +} + +; check: llcr %r2, %r2 +; nextln: nill %r3, 31 +; nextln: srlk %r2, %r2, 0(%r3) +; nextln: br %r14 + +function %ushr_i8_imm(i8) -> i8 { +block0(v0: i8): + v1 = iconst.i32 3 + v2 = ushr.i8 v0, v1 + return v2 +} + +; check: llcr %r2, %r2 +; nextln: srlk %r2, %r2, 3 +; nextln: br %r14 + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; ISHL +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +function %ishl_i64_reg(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2 = ishl.i64 v0, v1 + return v2 +} + +; check: sllg %r2, %r2, 0(%r3) +; nextln: br %r14 + +function %ishl_i64_imm(i64) -> i64 { +block0(v0: i64): + v1 = iconst.i32 17 + v2 = ishl.i64 v0, v1 + return v2 +} + +; check: sllg %r2, %r2, 17 +; nextln: br %r14 + +function %ishl_i32_reg(i32, i32) -> i32 { +block0(v0: i32, v1: i32): + v2 = ishl.i32 v0, v1 + return v2 +} + +; check: sllk %r2, %r2, 0(%r3) +; nextln: br %r14 + +function %ishl_i32_imm(i32) -> i32 { +block0(v0: i32): + v1 = iconst.i32 17 + v2 = ishl.i32 v0, v1 + return v2 +} + +; check: sllk %r2, %r2, 17 +; nextln: br %r14 + +function %ishl_i16_reg(i16, i16) -> i16 { +block0(v0: i16, v1: i16): + v2 = ishl.i16 v0, v1 + return v2 +} + +; check: nill %r3, 31 +; nextln: sllk %r2, %r2, 0(%r3) +; nextln: br %r14 + +function %ishl_i16_imm(i16) -> i16 { +block0(v0: i16): + v1 = iconst.i32 10 + v2 = ishl.i16 v0, v1 + return v2 +} + +; check: sllk %r2, %r2, 10 +; nextln: br %r14 + +function %ishl_i8_reg(i8, i8) -> i8 { +block0(v0: i8, v1: i8): + v2 = ishl.i8 v0, v1 + return v2 +} + +; check: nill %r3, 31 +; nextln: sllk %r2, %r2, 0(%r3) +; nextln: br %r14 + +function %ishl_i8_imm(i8) -> i8 { +block0(v0: i8): + v1 = iconst.i32 3 + v2 = ishl.i8 v0, v1 + return v2 +} + +; check: sllk %r2, %r2, 3 +; nextln: br %r14 + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; SSHR +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +function %sshr_i64_reg(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2 = sshr.i64 v0, v1 + return v2 +} + +; check: srag %r2, %r2, 0(%r3) +; nextln: br %r14 + +function %sshr_i64_imm(i64) -> i64 { +block0(v0: i64): + v1 = iconst.i32 17 + v2 = sshr.i64 v0, v1 + return v2 +} + +; check: srag %r2, %r2, 17 +; nextln: br %r14 + +function %sshr_i32_reg(i32, i32) -> i32 { +block0(v0: i32, v1: i32): + v2 = sshr.i32 v0, v1 + return v2 +} + +; check: srak %r2, %r2, 0(%r3) +; nextln: br %r14 + +function %sshr_i32_imm(i32) -> i32 { +block0(v0: i32): + v1 = iconst.i32 17 + v2 = sshr.i32 v0, v1 + return v2 +} + +; check: srak %r2, %r2, 17 +; nextln: br %r14 + +function %sshr_i16_reg(i16, i16) -> i16 { +block0(v0: i16, v1: i16): + v2 = sshr.i16 v0, v1 + return v2 +} + +; check: lhr %r2, %r2 +; nextln: nill %r3, 31 +; nextln: srak %r2, %r2, 0(%r3) +; nextln: br %r14 + +function %sshr_i16_imm(i16) -> i16 { +block0(v0: i16): + v1 = iconst.i32 10 + v2 = sshr.i16 v0, v1 + return v2 +} + +; check: lhr %r2, %r2 +; nextln: srak %r2, %r2, 10 +; nextln: br %r14 + +function %sshr_i8_reg(i8, i8) -> i8 { +block0(v0: i8, v1: i8): + v2 = sshr.i8 v0, v1 + return v2 +} + +; check: lbr %r2, %r2 +; nextln: nill %r3, 31 +; nextln: srak %r2, %r2, 0(%r3) +; nextln: br %r14 + +function %sshr_i8_imm(i8) -> i8 { +block0(v0: i8): + v1 = iconst.i32 3 + v2 = sshr.i8 v0, v1 + return v2 +} + +; check: lbr %r2, %r2 +; nextln: srak %r2, %r2, 3 +; nextln: br %r14 + diff --git a/cranelift/filetests/filetests/isa/s390x/stack-limit.clif b/cranelift/filetests/filetests/isa/s390x/stack-limit.clif new file mode 100644 index 0000000000..0ef7320340 --- /dev/null +++ b/cranelift/filetests/filetests/isa/s390x/stack-limit.clif @@ -0,0 +1,175 @@ +test compile +target s390x + +function %foo() { +block0: + return +} + +function %stack_limit_leaf_zero(i64 stack_limit) { +block0(v0: i64): + return +} + +; check: br %r14 + +function %stack_limit_gv_leaf_zero(i64 vmctx) { + gv0 = vmctx + gv1 = load.i64 notrap aligned gv0 + gv2 = load.i64 notrap aligned gv1+4 + stack_limit = gv2 +block0(v0: i64): + return +} + +; check: br %r14 + + +function %stack_limit_call_zero(i64 stack_limit) { + fn0 = %foo() +block0(v0: i64): + call fn0() + return +} + +; check: clgrtle %r15, %r2 +; nextln: stmg %r14, %r15, 112(%r15) +; nextln: aghi %r15, -160 +; nextln: virtual_sp_offset_adjust 160 +; nextln: bras %r1, 12 ; data %foo + 0 ; lg %r2, 0(%r1) +; nextln: basr %r14, %r2 +; nextln: lmg %r14, %r15, 272(%r15) +; nextln: br %r14 + +function %stack_limit_gv_call_zero(i64 vmctx) { + gv0 = vmctx + gv1 = load.i64 notrap aligned gv0 + gv2 = load.i64 notrap aligned gv1+4 + stack_limit = gv2 + fn0 = %foo() +block0(v0: i64): + call fn0() + return +} + +; check: lg %r1, 0(%r2) +; nextln: lg %r1, 4(%r1) +; nextln: clgrtle %r15, %r1 +; nextln: stmg %r14, %r15, 112(%r15) +; nextln: aghi %r15, -160 +; nextln: virtual_sp_offset_adjust 160 +; nextln: bras %r1, 12 ; data %foo + 0 ; lg %r2, 0(%r1) +; nextln: basr %r14, %r2 +; nextln: lmg %r14, %r15, 272(%r15) +; nextln: br %r14 + +function %stack_limit(i64 stack_limit) { + ss0 = explicit_slot 168 +block0(v0: i64): + return +} + +; check: la %r1, 168(%r2) +; nextln: clgrtle %r15, %r1 +; nextln: aghi %r15, -168 +; nextln: aghi %r15, 168 +; nextln: br %r14 + +function %large_stack_limit(i64 stack_limit) { + ss0 = explicit_slot 400000 +block0(v0: i64): + return +} + +; check: clgrtle %r15, %r2 +; nextln: lay %r1, 400000(%r2) +; nextln: clgrtle %r15, %r1 +; nextln: agfi %r15, -400000 +; nextln: agfi %r15, 400000 +; nextln: br %r14 + +function %huge_stack_limit(i64 stack_limit) { + ss0 = explicit_slot 4000000 +block0(v0: i64): + return +} + +; check: clgrtle %r15, %r2 +; nextln: lgr %r1, %r2 +; nextln: algfi %r1, 4000000 +; nextln: clgrtle %r15, %r1 +; nextln: agfi %r15, -4000000 +; nextln: agfi %r15, 4000000 +; nextln: br %r14 + +function %limit_preamble(i64 vmctx) { + gv0 = vmctx + gv1 = load.i64 notrap aligned gv0 + gv2 = load.i64 notrap aligned gv1+4 + stack_limit = gv2 + ss0 = explicit_slot 20 +block0(v0: i64): + return +} + +; check: lg %r1, 0(%r2) +; nextln: lg %r1, 4(%r1) +; nextln: la %r1, 24(%r1) +; nextln: clgrtle %r15, %r1 +; nextln: aghi %r15, -24 +; nextln: aghi %r15, 24 +; nextln: br %r14 + +function %limit_preamble_large(i64 vmctx) { + gv0 = vmctx + gv1 = load.i64 notrap aligned gv0 + gv2 = load.i64 notrap aligned gv1+4 + stack_limit = gv2 + ss0 = explicit_slot 400000 +block0(v0: i64): + return +} + +; check: lg %r1, 0(%r2) +; nextln: lg %r1, 4(%r1) +; nextln: clgrtle %r15, %r1 +; nextln: lay %r1, 400000(%r1) +; nextln: clgrtle %r15, %r1 +; nextln: agfi %r15, -400000 +; nextln: agfi %r15, 400000 +; nextln: br %r14 + +function %limit_preamble_huge(i64 vmctx) { + gv0 = vmctx + gv1 = load.i64 notrap aligned gv0 + gv2 = load.i64 notrap aligned gv1+4 + stack_limit = gv2 + ss0 = explicit_slot 4000000 +block0(v0: i64): + return +} + +; check: lg %r1, 0(%r2) +; nextln: lg %r1, 4(%r1) +; nextln: clgrtle %r15, %r1 +; nextln: algfi %r1, 4000000 +; nextln: clgrtle %r15, %r1 +; nextln: agfi %r15, -4000000 +; nextln: agfi %r15, 4000000 +; nextln: br %r14 + +function %limit_preamble_huge_offset(i64 vmctx) { + gv0 = vmctx + gv1 = load.i64 notrap aligned gv0+1000000 + stack_limit = gv1 + ss0 = explicit_slot 20 +block0(v0: i64): + return +} + +; check: lgfi %r1, 1000000 ; lg %r1, 0(%r1,%r2) +; nextln: la %r1, 24(%r1) +; nextln: clgrtle %r15, %r1 +; nextln: aghi %r15, -24 +; nextln: aghi %r15, 24 +; nextln: br %r14 diff --git a/cranelift/filetests/filetests/isa/s390x/stack.clif b/cranelift/filetests/filetests/isa/s390x/stack.clif new file mode 100644 index 0000000000..1ac80b9fd0 --- /dev/null +++ b/cranelift/filetests/filetests/isa/s390x/stack.clif @@ -0,0 +1,93 @@ +test compile +target s390x + +; FIXME: Should allocate register save area. + +function %stack_addr_small() -> i64 { +ss0 = explicit_slot 8 + +block0: + v0 = stack_addr.i64 ss0 + return v0 +} + +; check: aghi %r15, -8 +; nextln: la %r2, 0(%r15) +; nextln: aghi %r15, 8 +; nextln: br %r14 + +function %stack_addr_big() -> i64 { +ss0 = explicit_slot 100000 +ss1 = explicit_slot 8 + +block0: + v0 = stack_addr.i64 ss0 + return v0 +} + +; check: agfi %r15, -100008 +; nextln: la %r2, 0(%r15) +; nextln: agfi %r15, 100008 +; nextln: br %r14 + +; FIXME: don't use stack_addr legalization for stack_load and stack_store + +function %stack_load_small() -> i64 { +ss0 = explicit_slot 8 + +block0: + v0 = stack_load.i64 ss0 + return v0 +} + +; check: aghi %r15, -8 +; nextln: la %r2, 0(%r15) +; nextln: lg %r2, 0(%r2) +; nextln: aghi %r15, 8 +; nextln: br %r14 + +function %stack_load_big() -> i64 { +ss0 = explicit_slot 100000 +ss1 = explicit_slot 8 + +block0: + v0 = stack_load.i64 ss0 + return v0 +} + +; check: agfi %r15, -100008 +; nextln: la %r2, 0(%r15) +; nextln: lg %r2, 0(%r2) +; nextln: agfi %r15, 100008 +; nextln: br %r14 + + +function %stack_store_small(i64) { +ss0 = explicit_slot 8 + +block0(v0: i64): + stack_store.i64 v0, ss0 + return +} + +; check: aghi %r15, -8 +; nextln: la %r3, 0(%r15) +; nextln: stg %r2, 0(%r3) +; nextln: aghi %r15, 8 +; nextln: br %r14 + +function %stack_store_big(i64) { +ss0 = explicit_slot 100000 +ss1 = explicit_slot 8 + +block0(v0: i64): + stack_store.i64 v0, ss0 + return +} + +; check: agfi %r15, -100008 +; nextln: la %r3, 0(%r15) +; nextln: stg %r2, 0(%r3) +; nextln: agfi %r15, 100008 +; nextln: br %r14 + diff --git a/cranelift/filetests/filetests/isa/s390x/store-little.clif b/cranelift/filetests/filetests/isa/s390x/store-little.clif new file mode 100644 index 0000000000..65c9ffab93 --- /dev/null +++ b/cranelift/filetests/filetests/isa/s390x/store-little.clif @@ -0,0 +1,281 @@ +test compile +target s390x + +function %store_i64(i64, i64) { +block0(v0: i64, v1: i64): + store.i64 little v0, v1 + return +} + +; check: strvg %r2, 0(%r3) +; nextln: br %r14 + +function %store_i64_sym(i64) { + gv0 = symbol colocated %sym +block0(v0: i64): + v1 = symbol_value.i64 gv0 + store.i64 little v0, v1 + return +} + +; check: larl %r1, %sym + 0 ; strvg %r2, 0(%r1) +; nextln: br %r14 + +function %store_imm_i64(i64) { +block0(v0: i64): + v1 = iconst.i64 12345 + store.i64 little v1, v0 + return +} + +; check: lghi %r3, 12345 +; nextln: strvg %r3, 0(%r2) +; nextln: br %r14 + +function %istore8_i64(i64, i64) { +block0(v0: i64, v1: i64): + istore8.i64 little v0, v1 + return +} + +; check: stc %r2, 0(%r3) +; nextln: br %r14 + +function %istore8_imm_i64(i64) { +block0(v0: i64): + v1 = iconst.i64 123 + istore8.i64 little v1, v0 + return +} + +; check: mvi 0(%r2), 123 +; nextln: br %r14 + +function %istore16_i64(i64, i64) { +block0(v0: i64, v1: i64): + istore16.i64 little v0, v1 + return +} + +; check: strvh %r2, 0(%r3) +; nextln: br %r14 + +function %istore16_i64_sym(i64) { + gv0 = symbol colocated %sym +block0(v0: i64): + v1 = symbol_value.i64 gv0 + istore16.i64 little v0, v1 + return +} + +; check: larl %r1, %sym + 0 ; strvh %r2, 0(%r1) +; nextln: br %r14 + +function %istore16_imm_i64(i64) { +block0(v0: i64): + v1 = iconst.i64 12345 + istore16.i64 little v1, v0 + return +} + +; check: mvhhi 0(%r2), 14640 +; nextln: br %r14 + +function %istore32_i64(i64, i64) { +block0(v0: i64, v1: i64): + istore32.i64 little v0, v1 + return +} + +; check: strv %r2, 0(%r3) +; nextln: br %r14 + +function %istore32_i64_sym(i64) { + gv0 = symbol colocated %sym +block0(v0: i64): + v1 = symbol_value.i64 gv0 + istore32.i64 little v0, v1 + return +} + +; check: larl %r1, %sym + 0 ; strv %r2, 0(%r1) +; nextln: br %r14 + +function %istore32_imm_i64(i64) { +block0(v0: i64): + v1 = iconst.i64 12345 + istore32.i64 little v1, v0 + return +} + +; check: lghi %r3, 12345 +; nextln: strv %r3, 0(%r2) +; nextln: br %r14 + +function %store_i32(i32, i64) { +block0(v0: i32, v1: i64): + store.i32 little v0, v1 + return +} + +; check: strv %r2, 0(%r3) +; nextln: br %r14 + +function %store_i32_sym(i32) { + gv0 = symbol colocated %sym +block0(v0: i32): + v1 = symbol_value.i64 gv0 + store.i32 little v0, v1 + return +} + +; check: larl %r1, %sym + 0 ; strv %r2, 0(%r1) +; nextln: br %r14 + +function %store_imm_i32(i64) { +block0(v0: i64): + v1 = iconst.i32 12345 + store.i32 little v1, v0 + return +} + +; check: lhi %r3, 12345 +; nextln: strv %r3, 0(%r2) +; nextln: br %r14 + +function %istore8_i32(i32, i64) { +block0(v0: i32, v1: i64): + istore8.i32 little v0, v1 + return +} + +; check: stc %r2, 0(%r3) +; nextln: br %r14 + +function %istore8_imm_i32(i64) { +block0(v0: i64): + v1 = iconst.i32 123 + istore8.i32 little v1, v0 + return +} + +; check: mvi 0(%r2), 123 +; nextln: br %r14 + +function %istore16_i32(i32, i64) { +block0(v0: i32, v1: i64): + istore16.i32 little v0, v1 + return +} + +; check: strvh %r2, 0(%r3) +; nextln: br %r14 + +function %istore16_i32_sym(i32) { + gv0 = symbol colocated %sym +block0(v0: i32): + v1 = symbol_value.i64 gv0 + istore16.i32 little v0, v1 + return +} + +; check: larl %r1, %sym + 0 ; strvh %r2, 0(%r1) +; nextln: br %r14 + +function %istore16_imm_i32(i64) { +block0(v0: i64): + v1 = iconst.i32 12345 + istore16.i32 little v1, v0 + return +} + +; check: mvhhi 0(%r2), 14640 +; nextln: br %r14 + +function %store_i16(i16, i64) { +block0(v0: i16, v1: i64): + store.i16 little v0, v1 + return +} + +; check: strvh %r2, 0(%r3) +; nextln: br %r14 + +function %store_i16_sym(i16) { + gv0 = symbol colocated %sym +block0(v0: i16): + v1 = symbol_value.i64 gv0 + store.i16 little v0, v1 + return +} + +; check: larl %r1, %sym + 0 ; strvh %r2, 0(%r1) +; nextln: br %r14 + +function %store_imm_i16(i64) { +block0(v0: i64): + v1 = iconst.i16 12345 + store.i16 little v1, v0 + return +} + +; check: mvhhi 0(%r2), 14640 +; nextln: br %r14 + +function %istore8_i16(i16, i64) { +block0(v0: i16, v1: i64): + istore8.i16 little v0, v1 + return +} + +; check: stc %r2, 0(%r3) +; nextln: br %r14 + +function %istore8_imm_i16(i64) { +block0(v0: i64): + v1 = iconst.i16 123 + istore8.i16 little v1, v0 + return +} + +; check: mvi 0(%r2), 123 +; nextln: br %r14 + +function %store_i8(i8, i64) { +block0(v0: i8, v1: i64): + store.i8 little v0, v1 + return +} + +; check: stc %r2, 0(%r3) +; nextln: br %r14 + +function %store_i8_off(i8, i64) { +block0(v0: i8, v1: i64): + store.i8 little v0, v1+4096 + return +} + +; check: stcy %r2, 4096(%r3) +; nextln: br %r14 + +function %store_imm_i8(i64) { +block0(v0: i64): + v1 = iconst.i8 123 + store.i8 little v1, v0 + return +} + +; check: mvi 0(%r2), 123 +; nextln: br %r14 + +function %store_imm_i8_off(i64) { +block0(v0: i64): + v1 = iconst.i8 123 + store.i8 little v1, v0+4096 + return +} + +; check: mviy 4096(%r2), 123 +; nextln: br %r14 + diff --git a/cranelift/filetests/filetests/isa/s390x/store.clif b/cranelift/filetests/filetests/isa/s390x/store.clif new file mode 100644 index 0000000000..c7f896ae29 --- /dev/null +++ b/cranelift/filetests/filetests/isa/s390x/store.clif @@ -0,0 +1,296 @@ +test compile +target s390x + +function %store_i64(i64, i64) { +block0(v0: i64, v1: i64): + store.i64 v0, v1 + return +} + +; check: stg %r2, 0(%r3) +; nextln: br %r14 + +function %store_i64_sym(i64) { + gv0 = symbol colocated %sym +block0(v0: i64): + v1 = symbol_value.i64 gv0 + store.i64 v0, v1 + return +} + +; check: stgrl %r2, %sym + 0 +; nextln: br %r14 + +function %store_imm_i64(i64) { +block0(v0: i64): + v1 = iconst.i64 12345 + store.i64 v1, v0 + return +} + +; check: mvghi 0(%r2), 12345 +; nextln: br %r14 + +function %istore8_i64(i64, i64) { +block0(v0: i64, v1: i64): + istore8.i64 v0, v1 + return +} + +; check: stc %r2, 0(%r3) +; nextln: br %r14 + +function %istore8_imm_i64(i64) { +block0(v0: i64): + v1 = iconst.i64 123 + istore8.i64 v1, v0 + return +} + +; check: mvi 0(%r2), 123 +; nextln: br %r14 + +function %istore16_i64(i64, i64) { +block0(v0: i64, v1: i64): + istore16.i64 v0, v1 + return +} + +; check: sth %r2, 0(%r3) +; nextln: br %r14 + +function %istore16_i64_sym(i64) { + gv0 = symbol colocated %sym +block0(v0: i64): + v1 = symbol_value.i64 gv0 + istore16.i64 v0, v1 + return +} + +; check: sthrl %r2, %sym + 0 +; nextln: br %r14 + +function %istore16_imm_i64(i64) { +block0(v0: i64): + v1 = iconst.i64 12345 + istore16.i64 v1, v0 + return +} + +; check: mvhhi 0(%r2), 12345 +; nextln: br %r14 + +function %istore32_i64(i64, i64) { +block0(v0: i64, v1: i64): + istore32.i64 v0, v1 + return +} + +; check: st %r2, 0(%r3) +; nextln: br %r14 + +function %istore32_i64_sym(i64) { + gv0 = symbol colocated %sym +block0(v0: i64): + v1 = symbol_value.i64 gv0 + istore32.i64 v0, v1 + return +} + +; check: strl %r2, %sym + 0 +; nextln: br %r14 + +function %istore32_imm_i64(i64) { +block0(v0: i64): + v1 = iconst.i64 12345 + istore32.i64 v1, v0 + return +} + +; check: mvhi 0(%r2), 12345 +; nextln: br %r14 + +function %store_i32(i32, i64) { +block0(v0: i32, v1: i64): + store.i32 v0, v1 + return +} + +; check: st %r2, 0(%r3) +; nextln: br %r14 + +function %store_i32_sym(i32) { + gv0 = symbol colocated %sym +block0(v0: i32): + v1 = symbol_value.i64 gv0 + store.i32 v0, v1 + return +} + +; check: strl %r2, %sym + 0 +; nextln: br %r14 + +function %store_i32_off(i32, i64) { +block0(v0: i32, v1: i64): + store.i32 v0, v1+4096 + return +} + +; check: sty %r2, 4096(%r3) +; nextln: br %r14 + +function %store_imm_i32(i64) { +block0(v0: i64): + v1 = iconst.i32 12345 + store.i32 v1, v0 + return +} + +; check: mvhi 0(%r2), 12345 +; nextln: br %r14 + +function %istore8_i32(i32, i64) { +block0(v0: i32, v1: i64): + istore8.i32 v0, v1 + return +} + +; check: stc %r2, 0(%r3) +; nextln: br %r14 + +function %istore8_imm_i32(i64) { +block0(v0: i64): + v1 = iconst.i32 123 + istore8.i32 v1, v0 + return +} + +; check: mvi 0(%r2), 123 +; nextln: br %r14 + +function %istore16_i32(i32, i64) { +block0(v0: i32, v1: i64): + istore16.i32 v0, v1 + return +} + +; check: sth %r2, 0(%r3) +; nextln: br %r14 + +function %istore16_i32_sym(i32) { + gv0 = symbol colocated %sym +block0(v0: i32): + v1 = symbol_value.i64 gv0 + istore16.i32 v0, v1 + return +} + +; check: sthrl %r2, %sym + 0 +; nextln: br %r14 + +function %istore16_imm_i32(i64) { +block0(v0: i64): + v1 = iconst.i32 12345 + istore16.i32 v1, v0 + return +} + +; check: mvhhi 0(%r2), 12345 +; nextln: br %r14 + +function %store_i16(i16, i64) { +block0(v0: i16, v1: i64): + store.i16 v0, v1 + return +} + +; check: sth %r2, 0(%r3) +; nextln: br %r14 + +function %store_i16_sym(i16) { + gv0 = symbol colocated %sym +block0(v0: i16): + v1 = symbol_value.i64 gv0 + store.i16 v0, v1 + return +} + +; check: sthrl %r2, %sym + 0 +; nextln: br %r14 + +function %store_i16_off(i16, i64) { +block0(v0: i16, v1: i64): + store.i16 v0, v1+4096 + return +} + +; check: sthy %r2, 4096(%r3) +; nextln: br %r14 + +function %store_imm_i16(i64) { +block0(v0: i64): + v1 = iconst.i16 12345 + store.i16 v1, v0 + return +} + +; check: mvhhi 0(%r2), 12345 +; nextln: br %r14 + +function %istore8_i16(i16, i64) { +block0(v0: i16, v1: i64): + istore8.i16 v0, v1 + return +} + +; check: stc %r2, 0(%r3) +; nextln: br %r14 + +function %istore8_imm_i16(i64) { +block0(v0: i64): + v1 = iconst.i16 123 + istore8.i16 v1, v0 + return +} + +; check: mvi 0(%r2), 123 +; nextln: br %r14 + +function %store_i8(i8, i64) { +block0(v0: i8, v1: i64): + store.i8 v0, v1 + return +} + +; check: stc %r2, 0(%r3) +; nextln: br %r14 + +function %store_i8_off(i8, i64) { +block0(v0: i8, v1: i64): + store.i8 v0, v1+4096 + return +} + +; check: stcy %r2, 4096(%r3) +; nextln: br %r14 + +function %store_imm_i8(i64) { +block0(v0: i64): + v1 = iconst.i8 123 + store.i8 v1, v0 + return +} + +; check: mvi 0(%r2), 123 +; nextln: br %r14 + +function %store_imm_i8_off(i64) { +block0(v0: i64): + v1 = iconst.i8 123 + store.i8 v1, v0+4096 + return +} + +; check: mviy 4096(%r2), 123 +; nextln: br %r14 + diff --git a/cranelift/filetests/filetests/isa/s390x/symbols.clif b/cranelift/filetests/filetests/isa/s390x/symbols.clif new file mode 100644 index 0000000000..98d0cc8e30 --- /dev/null +++ b/cranelift/filetests/filetests/isa/s390x/symbols.clif @@ -0,0 +1,54 @@ +test compile +target s390x + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; SYMBOL_VALUE +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +function %symbol_value() -> i64 { + gv0 = symbol %my_global + +block0: + v0 = symbol_value.i64 gv0 + return v0 +} + +; check: bras %r1, 12 ; data %my_global + 0 ; lg %r2, 0(%r1) +; nextln: br %r14 + +function %symbol_value_colocated() -> i64 { + gv0 = symbol colocated %my_global_colo + +block0: + v0 = symbol_value.i64 gv0 + return v0 +} + +; check: larl %r2, %my_global_colo + 0 +; nextln: br %r14 + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; FUNC_ADDR +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +function %func_addr() -> i64 { + fn0 = %my_func(i64) -> i64 + +block0: + v0 = func_addr.i64 fn0 + return v0 +} + +; check: bras %r1, 12 ; data %my_func + 0 ; lg %r2, 0(%r1) +; nextln: br %r14 + +function %func_addr_colocated() -> i64 { + fn0 = colocated %my_func_colo(i64) -> i64 + +block0: + v0 = func_addr.i64 fn0 + return v0 +} + +; check: larl %r2, %my_func_colo + 0 +; nextln: br %r14 diff --git a/cranelift/filetests/filetests/isa/s390x/traps.clif b/cranelift/filetests/filetests/isa/s390x/traps.clif new file mode 100644 index 0000000000..a6b70cecea --- /dev/null +++ b/cranelift/filetests/filetests/isa/s390x/traps.clif @@ -0,0 +1,91 @@ +test compile +target s390x + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; TRAP/RESUMABLE_TRAP +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +function %trap() { +block0: + trap user0 +} + +; check: trap + +function %resumable_trap() { +block0: + trap user0 +} + +; check: trap + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; TRAPZ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +function %trapz(i64) { +block0(v0: i64): + v1 = iconst.i64 42 + v2 = icmp eq v0, v1 + trapz v2, user0 + return +} + +; FIXME: Does not use TrapIf internally as trapz is expanded. +; check: Block 0 +; check: clgfi %r2, 42 +; nextln: jge label1 ; jg label2 +; check: Block 1: +; check: br %r14 +; check: Block 2: +; check: trap + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; TRAPNZ/RESUMABLE_TRAPNZ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +function %trapnz(i64) { +block0(v0: i64): + v1 = iconst.i64 42 + v2 = icmp eq v0, v1 + trapnz v2, user0 + return +} + +; FIXME: Does not use TrapIf internally as trapnz is expanded. +; check: Block 0 +; check: clgfi %r2, 42 +; nextln: jgne label1 ; jg label2 +; check: Block 1: +; check: br %r14 +; check: Block 2: +; check: trap + +function %resumable_trapnz(i64) { +block0(v0: i64): + v1 = iconst.i64 42 + v2 = icmp eq v0, v1 + trapnz v2, user0 + return +} + +; FIXME: Does not use TrapIf internally as resumable_trapnz is expanded. +; check: Block 0 +; check: clgfi %r2, 42 +; nextln: jgne label1 ; jg label2 +; check: Block 1: +; check: br %r14 +; check: Block 2: +; check: trap + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; DEBUGTRAP +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +function %h() { +block0: + debugtrap + return +} + +; check: debugtrap