//! Lowering rules for S390x. use crate::ir::condcodes::{FloatCC, IntCC}; use crate::ir::Inst as IRInst; use crate::ir::{types, Endianness, InstructionData, MemFlags, Opcode, TrapCode, Type}; use crate::isa::s390x::abi::*; use crate::isa::s390x::inst::*; use crate::isa::s390x::settings as s390x_settings; use crate::isa::s390x::S390xBackend; use crate::machinst::lower::*; use crate::machinst::*; use crate::settings::Flags; use crate::CodegenResult; use alloc::boxed::Box; use alloc::vec::Vec; use core::convert::TryFrom; use regalloc::{Reg, Writable}; use smallvec::SmallVec; //============================================================================= // Helpers for instruction lowering. fn ty_is_int(ty: Type) -> bool { match ty { types::B1 | types::B8 | types::B16 | types::B32 | types::B64 => true, types::I8 | types::I16 | types::I32 | types::I64 | types::R64 => true, types::F32 | types::F64 => false, types::IFLAGS | types::FFLAGS => panic!("Unexpected flags type"), _ => panic!("ty_is_int() on unknown type: {:?}", ty), } } fn ty_is_float(ty: Type) -> bool { !ty_is_int(ty) } fn is_valid_atomic_transaction_ty(ty: Type) -> bool { match ty { types::I8 | types::I16 | types::I32 | types::I64 => true, _ => false, } } fn choose_32_64(ty: Type, op32: T, op64: T) -> T { let bits = ty_bits(ty); if bits <= 32 { op32 } else if bits == 64 { op64 } else { panic!("choose_32_64 on > 64 bits!") } } //============================================================================ // Lowering: convert instruction inputs to forms that we can use. /// Lower an instruction input to a 64-bit constant, if possible. fn input_matches_const>(ctx: &mut C, input: InsnInput) -> Option { let input = ctx.get_input_as_source_or_const(input.insn, input.input); input.constant } /// Lower an instruction input to a 64-bit signed constant, if possible. fn input_matches_sconst>(ctx: &mut C, input: InsnInput) -> Option { if let Some(imm) = input_matches_const(ctx, input) { let ty = ctx.input_ty(input.insn, input.input); Some(sign_extend_to_u64(imm, ty_bits(ty) as u8) as i64) } else { None } } /// Return false if instruction input cannot have the value Imm, true otherwise. fn input_maybe_imm>(ctx: &mut C, input: InsnInput, imm: u64) -> bool { if let Some(c) = input_matches_const(ctx, input) { let ty = ctx.input_ty(input.insn, input.input); let from_bits = ty_bits(ty) as u8; let mask = if from_bits < 64 { (1u64 << ty_bits(ty)) - 1 } else { 0xffff_ffff_ffff_ffff }; c & mask == imm & mask } else { true } } /// Lower an instruction input to a 16-bit signed constant, if possible. fn input_matches_simm16>(ctx: &mut C, input: InsnInput) -> Option { if let Some(imm_value) = input_matches_sconst(ctx, input) { if let Ok(imm) = i16::try_from(imm_value) { return Some(imm); } } None } /// Lower an instruction input to a 32-bit signed constant, if possible. fn input_matches_simm32>(ctx: &mut C, input: InsnInput) -> Option { if let Some(imm_value) = input_matches_sconst(ctx, input) { if let Ok(imm) = i32::try_from(imm_value) { return Some(imm); } } None } /// Lower an instruction input to a 32-bit unsigned constant, if possible. fn input_matches_uimm32>(ctx: &mut C, input: InsnInput) -> Option { if let Some(imm_value) = input_matches_const(ctx, input) { if let Ok(imm) = u32::try_from(imm_value) { return Some(imm); } } None } /// Lower a negated instruction input to a 16-bit signed constant, if possible. fn negated_input_matches_simm16>( ctx: &mut C, input: InsnInput, ) -> Option { if let Some(imm_value) = input_matches_sconst(ctx, input) { if let Ok(imm) = i16::try_from(-imm_value) { return Some(imm); } } None } /// Lower a negated instruction input to a 32-bit signed constant, if possible. fn negated_input_matches_simm32>( ctx: &mut C, input: InsnInput, ) -> Option { if let Some(imm_value) = input_matches_sconst(ctx, input) { if let Ok(imm) = i32::try_from(-imm_value) { return Some(imm); } } None } /// Lower an instruction input to a 16-bit shifted constant, if possible. fn input_matches_uimm16shifted>( ctx: &mut C, input: InsnInput, ) -> Option { if let Some(imm_value) = input_matches_const(ctx, input) { return UImm16Shifted::maybe_from_u64(imm_value); } None } /// Lower an instruction input to a 32-bit shifted constant, if possible. fn input_matches_uimm32shifted>( ctx: &mut C, input: InsnInput, ) -> Option { if let Some(imm_value) = input_matches_const(ctx, input) { return UImm32Shifted::maybe_from_u64(imm_value); } None } /// Lower an instruction input to a 16-bit inverted shifted constant, if possible. fn input_matches_uimm16shifted_inv>( ctx: &mut C, input: InsnInput, ) -> Option { if let Some(imm_value) = input_matches_const(ctx, input) { if let Some(imm) = UImm16Shifted::maybe_from_u64(!imm_value) { return Some(imm.negate_bits()); } } None } /// Lower an instruction input to a 32-bit inverted shifted constant, if possible. fn input_matches_uimm32shifted_inv>( ctx: &mut C, input: InsnInput, ) -> Option { if let Some(imm_value) = input_matches_const(ctx, input) { if let Some(imm) = UImm32Shifted::maybe_from_u64(!imm_value) { return Some(imm.negate_bits()); } } None } /// Checks for an instance of `op` feeding the given input. fn input_matches_insn>( c: &mut C, input: InsnInput, op: Opcode, ) -> Option { let inputs = c.get_input_as_source_or_const(input.insn, input.input); if let Some((src_inst, _)) = inputs.inst { let data = c.data(src_inst); if data.opcode() == op { return Some(src_inst); } } None } /// Checks for an instance of `op` feeding the given input, possibly via a conversion `conv` (e.g., /// Bint or a bitcast). fn input_matches_insn_via_conv>( c: &mut C, input: InsnInput, op: Opcode, conv: Opcode, ) -> Option { let inputs = c.get_input_as_source_or_const(input.insn, input.input); if let Some((src_inst, _)) = inputs.inst { let data = c.data(src_inst); if data.opcode() == op { return Some(src_inst); } if data.opcode() == conv { let inputs = c.get_input_as_source_or_const(src_inst, 0); if let Some((src_inst, _)) = inputs.inst { let data = c.data(src_inst); if data.opcode() == op { return Some(src_inst); } } } } None } fn input_matches_load_insn>( ctx: &mut C, input: InsnInput, op: Opcode, ) -> Option { if let Some(insn) = input_matches_insn(ctx, input, op) { let inputs: SmallVec<[InsnInput; 4]> = (0..ctx.num_inputs(insn)) .map(|i| InsnInput { insn, input: i }) .collect(); let off = ctx.data(insn).load_store_offset().unwrap(); let flags = ctx.memflags(insn).unwrap(); let endianness = flags.endianness(Endianness::Big); if endianness == Endianness::Big { let mem = lower_address(ctx, &inputs[..], off, flags); ctx.sink_inst(insn); return Some(mem); } } None } fn input_matches_mem>(ctx: &mut C, input: InsnInput) -> Option { if ty_bits(ctx.input_ty(input.insn, input.input)) >= 32 { return input_matches_load_insn(ctx, input, Opcode::Load); } None } fn input_matches_sext16_mem>( ctx: &mut C, input: InsnInput, ) -> Option { if ty_bits(ctx.input_ty(input.insn, input.input)) == 16 { return input_matches_load_insn(ctx, input, Opcode::Load); } if ty_bits(ctx.input_ty(input.insn, input.input)) >= 32 { return input_matches_load_insn(ctx, input, Opcode::Sload16); } None } fn input_matches_sext32_mem>( ctx: &mut C, input: InsnInput, ) -> Option { if ty_bits(ctx.input_ty(input.insn, input.input)) > 32 { return input_matches_load_insn(ctx, input, Opcode::Sload32); } None } fn input_matches_sext32_reg>(ctx: &mut C, input: InsnInput) -> Option { if let Some(insn) = input_matches_insn(ctx, input, Opcode::Sextend) { if ty_bits(ctx.input_ty(insn, 0)) == 32 { let reg = put_input_in_reg(ctx, InsnInput { insn, input: 0 }, NarrowValueMode::None); return Some(reg); } } None } fn input_matches_uext32_reg>(ctx: &mut C, input: InsnInput) -> Option { if let Some(insn) = input_matches_insn(ctx, input, Opcode::Uextend) { if ty_bits(ctx.input_ty(insn, 0)) == 32 { let reg = put_input_in_reg(ctx, InsnInput { insn, input: 0 }, NarrowValueMode::None); return Some(reg); } } None } fn input_matches_uext16_mem>( ctx: &mut C, input: InsnInput, ) -> Option { if ty_bits(ctx.input_ty(input.insn, input.input)) == 16 { return input_matches_load_insn(ctx, input, Opcode::Load); } if ty_bits(ctx.input_ty(input.insn, input.input)) >= 32 { return input_matches_load_insn(ctx, input, Opcode::Uload16); } None } fn input_matches_uext32_mem>( ctx: &mut C, input: InsnInput, ) -> Option { if ty_bits(ctx.input_ty(input.insn, input.input)) > 32 { return input_matches_load_insn(ctx, input, Opcode::Uload32); } None } //============================================================================ // Lowering: force instruction input into a register /// How to handle narrow values loaded into registers; see note on `narrow_mode` /// parameter to `put_input_in_*` below. #[derive(Clone, Copy, Debug, PartialEq, Eq)] enum NarrowValueMode { None, /// Zero-extend to 32 bits if original is < 32 bits. ZeroExtend32, /// Sign-extend to 32 bits if original is < 32 bits. SignExtend32, /// Zero-extend to 64 bits if original is < 64 bits. ZeroExtend64, /// Sign-extend to 64 bits if original is < 64 bits. SignExtend64, } fn extend_memory_to_reg>( ctx: &mut C, mem: MemArg, from_ty: Type, to_ty: Type, signed: bool, ) -> Reg { let rd = ctx.alloc_tmp(to_ty).only_reg().unwrap(); ctx.emit(match (signed, ty_bits(to_ty), ty_bits(from_ty)) { (false, 32, 8) => Inst::Load32ZExt8 { rd, mem }, (false, 32, 16) => Inst::Load32ZExt16 { rd, mem }, (true, 32, 8) => Inst::Load32SExt8 { rd, mem }, (true, 32, 16) => Inst::Load32SExt16 { rd, mem }, (false, 64, 8) => Inst::Load64ZExt8 { rd, mem }, (false, 64, 16) => Inst::Load64ZExt16 { rd, mem }, (false, 64, 32) => Inst::Load64ZExt32 { rd, mem }, (true, 64, 8) => Inst::Load64SExt8 { rd, mem }, (true, 64, 16) => Inst::Load64SExt16 { rd, mem }, (true, 64, 32) => Inst::Load64SExt32 { rd, mem }, _ => panic!("Unsupported size in load"), }); rd.to_reg() } /// Sign-extend the low `from_bits` bits of `value` to a full u64. fn sign_extend_to_u64(value: u64, from_bits: u8) -> u64 { assert!(from_bits <= 64); if from_bits >= 64 { value } else { (((value << (64 - from_bits)) as i64) >> (64 - from_bits)) as u64 } } /// Zero-extend the low `from_bits` bits of `value` to a full u64. fn zero_extend_to_u64(value: u64, from_bits: u8) -> u64 { assert!(from_bits <= 64); if from_bits >= 64 { value } else { value & ((1u64 << from_bits) - 1) } } /// Lower an instruction input to a reg. /// /// The given register will be extended appropriately, according to /// `narrow_mode` and the input's type. fn put_input_in_reg>( ctx: &mut C, input: InsnInput, narrow_mode: NarrowValueMode, ) -> Reg { let signed = match narrow_mode { NarrowValueMode::SignExtend32 | NarrowValueMode::SignExtend64 => true, NarrowValueMode::ZeroExtend32 | NarrowValueMode::ZeroExtend64 => false, _ => false, }; let ty = ctx.input_ty(input.insn, input.input); let from_bits = ty_bits(ty) as u8; let ext_ty = match narrow_mode { NarrowValueMode::None => ty, NarrowValueMode::ZeroExtend32 | NarrowValueMode::SignExtend32 => types::I32, NarrowValueMode::ZeroExtend64 | NarrowValueMode::SignExtend64 => types::I64, }; let to_bits = ty_bits(ext_ty) as u8; assert!(to_bits >= from_bits); if let Some(c) = input_matches_const(ctx, input) { let extended = if from_bits == to_bits { c } else if signed { sign_extend_to_u64(c, from_bits) } else { zero_extend_to_u64(c, from_bits) }; let masked = zero_extend_to_u64(extended, to_bits); // Generate constants fresh at each use to minimize long-range register pressure. let to_reg = ctx.alloc_tmp(ext_ty).only_reg().unwrap(); for inst in Inst::gen_constant(ValueRegs::one(to_reg), masked as u128, ext_ty, |ty| { ctx.alloc_tmp(ty).only_reg().unwrap() }) .into_iter() { ctx.emit(inst); } to_reg.to_reg() } else if to_bits == from_bits { ctx.put_input_in_regs(input.insn, input.input) .only_reg() .unwrap() } else if let Some(mem) = input_matches_load_insn(ctx, input, Opcode::Load) { extend_memory_to_reg(ctx, mem, ty, ext_ty, signed) } else { let rd = ctx.alloc_tmp(ext_ty).only_reg().unwrap(); let rn = ctx .put_input_in_regs(input.insn, input.input) .only_reg() .unwrap(); ctx.emit(Inst::Extend { rd, rn, signed, from_bits, to_bits, }); rd.to_reg() } } //============================================================================ // Lowering: addressing mode support. Takes instruction directly, rather // than an `InsnInput`, to do more introspection. /// Lower the address of a load or store. fn lower_address>( ctx: &mut C, addends: &[InsnInput], offset: i32, flags: MemFlags, ) -> MemArg { // Handle one reg and offset. if addends.len() == 1 { if offset == 0 { if let Some(add) = input_matches_insn(ctx, addends[0], Opcode::Iadd) { debug_assert_eq!(ctx.output_ty(add, 0), types::I64); let add_inputs = &[ InsnInput { insn: add, input: 0, }, InsnInput { insn: add, input: 1, }, ]; let ra = put_input_in_reg(ctx, add_inputs[0], NarrowValueMode::None); let rb = put_input_in_reg(ctx, add_inputs[1], NarrowValueMode::None); return MemArg::reg_plus_reg(ra, rb, flags); } } if let Some(symbol) = input_matches_insn(ctx, addends[0], Opcode::SymbolValue) { let (extname, dist, ext_offset) = ctx.symbol_value(symbol).unwrap(); let ext_offset = ext_offset + i64::from(offset); if dist == RelocDistance::Near && (ext_offset & 1) == 0 { if let Ok(offset) = i32::try_from(ext_offset) { return MemArg::Symbol { name: Box::new(extname.clone()), offset, flags, }; } } } let reg = put_input_in_reg(ctx, addends[0], NarrowValueMode::None); return MemArg::reg_plus_off(reg, offset as i64, flags); } // Handle two regs and a zero offset. if addends.len() == 2 && offset == 0 { let ra = put_input_in_reg(ctx, addends[0], NarrowValueMode::None); let rb = put_input_in_reg(ctx, addends[1], NarrowValueMode::None); return MemArg::reg_plus_reg(ra, rb, flags); } // Otherwise, generate add instructions. let addr = ctx.alloc_tmp(types::I64).only_reg().unwrap(); // Get the const into a reg. lower_constant_u64(ctx, addr.clone(), offset as u64); // Add each addend to the address. for addend in addends { let reg = put_input_in_reg(ctx, *addend, NarrowValueMode::None); ctx.emit(Inst::AluRRR { alu_op: ALUOp::Add64, rd: addr.clone(), rn: addr.to_reg(), rm: reg.clone(), }); } MemArg::reg(addr.to_reg(), flags) } //============================================================================ // Lowering: generating constants. fn lower_constant_u64>(ctx: &mut C, rd: Writable, value: u64) { for inst in Inst::load_constant64(rd, value) { ctx.emit(inst); } } fn lower_constant_u32>(ctx: &mut C, rd: Writable, value: u32) { for inst in Inst::load_constant32(rd, value) { ctx.emit(inst); } } fn lower_constant_f32>(ctx: &mut C, rd: Writable, value: f32) { ctx.emit(Inst::load_fp_constant32(rd, value)); } fn lower_constant_f64>(ctx: &mut C, rd: Writable, value: f64) { ctx.emit(Inst::load_fp_constant64(rd, value)); } //============================================================================ // Lowering: miscellaneous helpers. /// Emit code to invert the value of type ty in register rd. fn lower_bnot>(ctx: &mut C, ty: Type, rd: Writable) { let alu_op = choose_32_64(ty, ALUOp::Xor32, ALUOp::Xor64); ctx.emit(Inst::AluRUImm32Shifted { alu_op, rd, imm: UImm32Shifted::maybe_from_u64(0xffff_ffff).unwrap(), }); if ty_bits(ty) > 32 { ctx.emit(Inst::AluRUImm32Shifted { alu_op, rd, imm: UImm32Shifted::maybe_from_u64(0xffff_ffff_0000_0000).unwrap(), }); } } /// Emit code to bitcast between integer and floating-point values. fn lower_bitcast>( ctx: &mut C, rd: Writable, output_ty: Type, rn: Reg, input_ty: Type, ) { match (input_ty, output_ty) { (types::I64, types::F64) => { ctx.emit(Inst::MovToFpr { rd, rn }); } (types::F64, types::I64) => { ctx.emit(Inst::MovFromFpr { rd, rn }); } (types::I32, types::F32) => { let tmp = ctx.alloc_tmp(types::I64).only_reg().unwrap(); ctx.emit(Inst::ShiftRR { shift_op: ShiftOp::LShL64, rd: tmp, rn, shift_imm: SImm20::maybe_from_i64(32).unwrap(), shift_reg: None, }); ctx.emit(Inst::MovToFpr { rd, rn: tmp.to_reg(), }); } (types::F32, types::I32) => { let tmp = ctx.alloc_tmp(types::I64).only_reg().unwrap(); ctx.emit(Inst::MovFromFpr { rd: tmp, rn }); ctx.emit(Inst::ShiftRR { shift_op: ShiftOp::LShR64, rd, rn: tmp.to_reg(), shift_imm: SImm20::maybe_from_i64(32).unwrap(), shift_reg: None, }); } _ => unreachable!("invalid bitcast from {:?} to {:?}", input_ty, output_ty), } } //============================================================================= // Lowering: comparisons /// Determines whether this condcode interprets inputs as signed or /// unsigned. See the documentation for the `icmp` instruction in /// cranelift-codegen/meta/src/shared/instructions.rs for further insights /// into this. pub fn condcode_is_signed(cc: IntCC) -> bool { match cc { IntCC::Equal => false, IntCC::NotEqual => false, IntCC::SignedGreaterThanOrEqual => true, IntCC::SignedGreaterThan => true, IntCC::SignedLessThanOrEqual => true, IntCC::SignedLessThan => true, IntCC::UnsignedGreaterThanOrEqual => false, IntCC::UnsignedGreaterThan => false, IntCC::UnsignedLessThanOrEqual => false, IntCC::UnsignedLessThan => false, IntCC::Overflow => true, IntCC::NotOverflow => true, } } fn lower_icmp_to_flags>( ctx: &mut C, insn: IRInst, is_signed: bool, may_sink_memory: bool, ) { let ty = ctx.input_ty(insn, 0); let bits = ty_bits(ty); let narrow_mode = match (bits <= 32, is_signed) { (true, true) => NarrowValueMode::SignExtend32, (true, false) => NarrowValueMode::ZeroExtend32, (false, true) => NarrowValueMode::SignExtend64, (false, false) => NarrowValueMode::ZeroExtend64, }; let inputs = [InsnInput { insn, input: 0 }, InsnInput { insn, input: 1 }]; let ty = ctx.input_ty(insn, 0); let rn = put_input_in_reg(ctx, inputs[0], narrow_mode); if is_signed { let op = choose_32_64(ty, CmpOp::CmpS32, CmpOp::CmpS64); // Try matching immedate operand. if let Some(imm) = input_matches_simm16(ctx, inputs[1]) { return ctx.emit(Inst::CmpRSImm16 { op, rn, imm }); } if let Some(imm) = input_matches_simm32(ctx, inputs[1]) { return ctx.emit(Inst::CmpRSImm32 { op, rn, imm }); } // If sinking memory loads is allowed, try matching memory operand. if may_sink_memory { if let Some(mem) = input_matches_mem(ctx, inputs[1]) { return ctx.emit(Inst::CmpRX { op, rn, mem }); } if let Some(mem) = input_matches_sext16_mem(ctx, inputs[1]) { let op = choose_32_64(ty, CmpOp::CmpS32Ext16, CmpOp::CmpS64Ext16); return ctx.emit(Inst::CmpRX { op, rn, mem }); } if let Some(mem) = input_matches_sext32_mem(ctx, inputs[1]) { return ctx.emit(Inst::CmpRX { op: CmpOp::CmpS64Ext32, rn, mem, }); } } // Try matching sign-extension in register. if let Some(rm) = input_matches_sext32_reg(ctx, inputs[1]) { return ctx.emit(Inst::CmpRR { op: CmpOp::CmpS64Ext32, rn, rm, }); } // If no special case matched above, fall back to a register compare. let rm = put_input_in_reg(ctx, inputs[1], narrow_mode); return ctx.emit(Inst::CmpRR { op, rn, rm }); } else { let op = choose_32_64(ty, CmpOp::CmpL32, CmpOp::CmpL64); // Try matching immedate operand. if let Some(imm) = input_matches_uimm32(ctx, inputs[1]) { return ctx.emit(Inst::CmpRUImm32 { op, rn, imm }); } // If sinking memory loads is allowed, try matching memory operand. if may_sink_memory { if let Some(mem) = input_matches_mem(ctx, inputs[1]) { return ctx.emit(Inst::CmpRX { op, rn, mem }); } if let Some(mem) = input_matches_uext16_mem(ctx, inputs[1]) { match &mem { &MemArg::Symbol { .. } => { let op = choose_32_64(ty, CmpOp::CmpL32Ext16, CmpOp::CmpL64Ext16); return ctx.emit(Inst::CmpRX { op, rn, mem }); } _ => { let reg_ty = choose_32_64(ty, types::I32, types::I64); let rm = extend_memory_to_reg(ctx, mem, ty, reg_ty, false); return ctx.emit(Inst::CmpRR { op, rn, rm }); } } } if let Some(mem) = input_matches_uext32_mem(ctx, inputs[1]) { return ctx.emit(Inst::CmpRX { op: CmpOp::CmpL64Ext32, rn, mem, }); } } // Try matching zero-extension in register. if let Some(rm) = input_matches_uext32_reg(ctx, inputs[1]) { return ctx.emit(Inst::CmpRR { op: CmpOp::CmpL64Ext32, rn, rm, }); } // If no special case matched above, fall back to a register compare. let rm = put_input_in_reg(ctx, inputs[1], narrow_mode); return ctx.emit(Inst::CmpRR { op, rn, rm }); } } fn lower_fcmp_to_flags>(ctx: &mut C, insn: IRInst) { let ty = ctx.input_ty(insn, 0); let bits = ty_bits(ty); let inputs = [InsnInput { insn, input: 0 }, InsnInput { insn, input: 1 }]; let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None); match bits { 32 => { ctx.emit(Inst::FpuCmp32 { rn, rm }); } 64 => { ctx.emit(Inst::FpuCmp64 { rn, rm }); } _ => panic!("Unknown float size"), } } fn lower_boolean_to_flags>(ctx: &mut C, input: InsnInput) -> Cond { if let Some(icmp_insn) = input_matches_insn_via_conv(ctx, input, Opcode::Icmp, Opcode::Bint) { // FIXME: If the Icmp (and Bint) only have a single use, we can still allow sinking memory let may_sink_memory = false; let condcode = ctx.data(icmp_insn).cond_code().unwrap(); let is_signed = condcode_is_signed(condcode); lower_icmp_to_flags(ctx, icmp_insn, is_signed, may_sink_memory); Cond::from_intcc(condcode) } else if let Some(fcmp_insn) = input_matches_insn_via_conv(ctx, input, Opcode::Fcmp, Opcode::Bint) { let condcode = ctx.data(fcmp_insn).fp_cond_code().unwrap(); lower_fcmp_to_flags(ctx, fcmp_insn); Cond::from_floatcc(condcode) } else { let ty = ctx.input_ty(input.insn, input.input); let narrow_mode = if ty.bits() < 32 { NarrowValueMode::ZeroExtend32 } else { NarrowValueMode::None }; let rn = put_input_in_reg(ctx, input, narrow_mode); let op = choose_32_64(ty, CmpOp::CmpS32, CmpOp::CmpS64); ctx.emit(Inst::CmpRSImm16 { op, rn, imm: 0 }); Cond::from_intcc(IntCC::NotEqual) } } fn lower_flags_to_bool_result>( ctx: &mut C, cond: Cond, rd: Writable, ty: Type, ) { if ty_bits(ty) == 1 { lower_constant_u32(ctx, rd, 0); ctx.emit(Inst::CMov32SImm16 { rd, cond, imm: 1 }); } else if ty_bits(ty) < 64 { lower_constant_u32(ctx, rd, 0); ctx.emit(Inst::CMov32SImm16 { rd, cond, imm: -1 }); } else { lower_constant_u64(ctx, rd, 0); ctx.emit(Inst::CMov64SImm16 { rd, cond, imm: -1 }); } } //============================================================================ // Lowering: main entry point for lowering a instruction fn lower_insn_to_regs>( ctx: &mut C, insn: IRInst, flags: &Flags, isa_flags: &s390x_settings::Flags, ) -> CodegenResult<()> { let op = ctx.data(insn).opcode(); let inputs: SmallVec<[InsnInput; 4]> = (0..ctx.num_inputs(insn)) .map(|i| InsnInput { insn, input: i }) .collect(); let outputs: SmallVec<[InsnOutput; 2]> = (0..ctx.num_outputs(insn)) .map(|i| InsnOutput { insn, output: i }) .collect(); let ty = if outputs.len() > 0 { Some(ctx.output_ty(insn, 0)) } else { None }; match op { Opcode::Nop => { // Nothing. } Opcode::Copy | Opcode::Ireduce | Opcode::Breduce => { // Smaller ints / bools have the high bits undefined, so any reduce // operation is simply a copy. let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); let ty = ctx.input_ty(insn, 0); ctx.emit(Inst::gen_move(rd, rn, ty)); } Opcode::Iconst | Opcode::Bconst | Opcode::Null => { let value = ctx.get_constant(insn).unwrap(); let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); let ty = ty.unwrap(); if ty.bits() <= 32 { lower_constant_u32(ctx, rd, value as u32); } else { lower_constant_u64(ctx, rd, value); } } Opcode::F32const => { let value = f32::from_bits(ctx.get_constant(insn).unwrap() as u32); let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); lower_constant_f32(ctx, rd, value); } Opcode::F64const => { let value = f64::from_bits(ctx.get_constant(insn).unwrap()); let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); lower_constant_f64(ctx, rd, value); } Opcode::Iadd => { let ty = ty.unwrap(); let alu_op = choose_32_64(ty, ALUOp::Add32, ALUOp::Add64); let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); if let Some(imm) = input_matches_simm16(ctx, inputs[1]) { ctx.emit(Inst::AluRRSImm16 { alu_op, rd, rn, imm, }); } else if let Some(imm) = input_matches_simm32(ctx, inputs[1]) { ctx.emit(Inst::gen_move(rd, rn, ty)); ctx.emit(Inst::AluRSImm32 { alu_op, rd, imm }); } else if let Some(mem) = input_matches_mem(ctx, inputs[1]) { ctx.emit(Inst::gen_move(rd, rn, ty)); ctx.emit(Inst::AluRX { alu_op, rd, mem }); } else if let Some(mem) = input_matches_sext16_mem(ctx, inputs[1]) { let alu_op = choose_32_64(ty, ALUOp::Add32Ext16, ALUOp::Add64Ext16); ctx.emit(Inst::gen_move(rd, rn, ty)); ctx.emit(Inst::AluRX { alu_op, rd, mem }); } else if let Some(mem) = input_matches_sext32_mem(ctx, inputs[1]) { ctx.emit(Inst::gen_move(rd, rn, ty)); ctx.emit(Inst::AluRX { alu_op: ALUOp::Add64Ext32, rd, mem, }); } else if let Some(rm) = input_matches_sext32_reg(ctx, inputs[1]) { ctx.emit(Inst::gen_move(rd, rn, ty)); ctx.emit(Inst::AluRR { alu_op: ALUOp::Add64Ext32, rd, rm, }); } else { let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None); ctx.emit(Inst::AluRRR { alu_op, rd, rn, rm }); } } Opcode::Isub => { let ty = ty.unwrap(); let alu_op = choose_32_64(ty, ALUOp::Sub32, ALUOp::Sub64); let neg_op = choose_32_64(ty, ALUOp::Add32, ALUOp::Add64); let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); if let Some(imm) = negated_input_matches_simm16(ctx, inputs[1]) { ctx.emit(Inst::AluRRSImm16 { alu_op: neg_op, rd, rn, imm, }); } else if let Some(imm) = negated_input_matches_simm32(ctx, inputs[1]) { ctx.emit(Inst::gen_move(rd, rn, ty)); ctx.emit(Inst::AluRSImm32 { alu_op: neg_op, rd, imm, }); } else if let Some(mem) = input_matches_mem(ctx, inputs[1]) { ctx.emit(Inst::gen_move(rd, rn, ty)); ctx.emit(Inst::AluRX { alu_op, rd, mem }); } else if let Some(mem) = input_matches_sext16_mem(ctx, inputs[1]) { let alu_op = choose_32_64(ty, ALUOp::Sub32Ext16, ALUOp::Sub64Ext16); ctx.emit(Inst::gen_move(rd, rn, ty)); ctx.emit(Inst::AluRX { alu_op, rd, mem }); } else if let Some(mem) = input_matches_sext32_mem(ctx, inputs[1]) { ctx.emit(Inst::gen_move(rd, rn, ty)); ctx.emit(Inst::AluRX { alu_op: ALUOp::Sub64Ext32, rd, mem, }); } else if let Some(rm) = input_matches_sext32_reg(ctx, inputs[1]) { ctx.emit(Inst::gen_move(rd, rn, ty)); ctx.emit(Inst::AluRR { alu_op: ALUOp::Sub64Ext32, rd, rm, }); } else { let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None); ctx.emit(Inst::AluRRR { alu_op, rd, rn, rm }); } } Opcode::IaddIfcout => { let ty = ty.unwrap(); assert!(ty == types::I32 || ty == types::I64); // Emit an ADD LOGICAL instruction, which sets the condition code // to indicate an (unsigned) carry bit. let alu_op = choose_32_64(ty, ALUOp::AddLogical32, ALUOp::AddLogical64); let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); if let Some(imm) = input_matches_uimm32(ctx, inputs[1]) { ctx.emit(Inst::gen_move(rd, rn, ty)); ctx.emit(Inst::AluRUImm32 { alu_op, rd, imm }); } else if let Some(mem) = input_matches_mem(ctx, inputs[1]) { ctx.emit(Inst::gen_move(rd, rn, ty)); ctx.emit(Inst::AluRX { alu_op, rd, mem }); } else if let Some(mem) = input_matches_uext32_mem(ctx, inputs[1]) { ctx.emit(Inst::gen_move(rd, rn, ty)); ctx.emit(Inst::AluRX { alu_op: ALUOp::AddLogical64Ext32, rd, mem, }); } else if let Some(rm) = input_matches_uext32_reg(ctx, inputs[1]) { ctx.emit(Inst::gen_move(rd, rn, ty)); ctx.emit(Inst::AluRR { alu_op: ALUOp::AddLogical64Ext32, rd, rm, }); } else { let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None); ctx.emit(Inst::AluRRR { alu_op, rd, rn, rm }); } } Opcode::UaddSat | Opcode::SaddSat => unimplemented!(), Opcode::UsubSat | Opcode::SsubSat => unimplemented!(), Opcode::Iabs => { let ty = ty.unwrap(); let op = choose_32_64(ty, UnaryOp::Abs32, UnaryOp::Abs64); let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); if let Some(rn) = input_matches_sext32_reg(ctx, inputs[0]) { ctx.emit(Inst::UnaryRR { op: UnaryOp::Abs64Ext32, rd, rn, }); } else { let narrow_mode = if ty.bits() < 32 { NarrowValueMode::SignExtend32 } else { NarrowValueMode::None }; let rn = put_input_in_reg(ctx, inputs[0], narrow_mode); ctx.emit(Inst::UnaryRR { op, rd, rn }); } } Opcode::Ineg => { let ty = ty.unwrap(); let op = choose_32_64(ty, UnaryOp::Neg32, UnaryOp::Neg64); let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); if let Some(rn) = input_matches_sext32_reg(ctx, inputs[0]) { ctx.emit(Inst::UnaryRR { op: UnaryOp::Neg64Ext32, rd, rn, }); } else { let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); ctx.emit(Inst::UnaryRR { op, rd, rn }); } } Opcode::Imul => { let ty = ty.unwrap(); let alu_op = choose_32_64(ty, ALUOp::Mul32, ALUOp::Mul64); let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); if let Some(imm) = input_matches_simm16(ctx, inputs[1]) { ctx.emit(Inst::gen_move(rd, rn, ty)); ctx.emit(Inst::AluRSImm16 { alu_op, rd, imm }); } else if let Some(imm) = input_matches_simm32(ctx, inputs[1]) { ctx.emit(Inst::gen_move(rd, rn, ty)); ctx.emit(Inst::AluRSImm32 { alu_op, rd, imm }); } else if let Some(mem) = input_matches_mem(ctx, inputs[1]) { ctx.emit(Inst::gen_move(rd, rn, ty)); ctx.emit(Inst::AluRX { alu_op, rd, mem }); } else if let Some(mem) = input_matches_sext16_mem(ctx, inputs[1]) { let alu_op = choose_32_64(ty, ALUOp::Mul32Ext16, ALUOp::Mul64Ext16); ctx.emit(Inst::gen_move(rd, rn, ty)); ctx.emit(Inst::AluRX { alu_op, rd, mem }); } else if let Some(mem) = input_matches_sext32_mem(ctx, inputs[1]) { ctx.emit(Inst::gen_move(rd, rn, ty)); ctx.emit(Inst::AluRX { alu_op: ALUOp::Mul64Ext32, rd, mem, }); } else if let Some(rm) = input_matches_sext32_reg(ctx, inputs[1]) { ctx.emit(Inst::gen_move(rd, rn, ty)); ctx.emit(Inst::AluRR { alu_op: ALUOp::Mul64Ext32, rd, rm, }); } else { let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None); ctx.emit(Inst::AluRRR { alu_op, rd, rn, rm }); } } Opcode::Umulhi | Opcode::Smulhi => { let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); let is_signed = op == Opcode::Smulhi; let input_ty = ctx.input_ty(insn, 0); assert!(ctx.input_ty(insn, 1) == input_ty); assert!(ctx.output_ty(insn, 0) == input_ty); match input_ty { types::I64 => { let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None); if is_signed { ctx.emit(Inst::SMulWide { rn, rm }); ctx.emit(Inst::gen_move(rd, gpr(0), input_ty)); } else { ctx.emit(Inst::gen_move(writable_gpr(1), rm, input_ty)); ctx.emit(Inst::UMulWide { rn }); ctx.emit(Inst::gen_move(rd, gpr(0), input_ty)); } } types::I32 => { let narrow_mode = if is_signed { NarrowValueMode::SignExtend64 } else { NarrowValueMode::ZeroExtend64 }; let rn = put_input_in_reg(ctx, inputs[0], narrow_mode); let rm = put_input_in_reg(ctx, inputs[1], narrow_mode); ctx.emit(Inst::AluRRR { alu_op: ALUOp::Mul64, rd, rn, rm, }); let shift_op = if is_signed { ShiftOp::AShR64 } else { ShiftOp::LShR64 }; ctx.emit(Inst::ShiftRR { shift_op, rd, rn: rd.to_reg(), shift_imm: SImm20::maybe_from_i64(32).unwrap(), shift_reg: None, }); } types::I16 | types::I8 => { let narrow_mode = if is_signed { NarrowValueMode::SignExtend32 } else { NarrowValueMode::ZeroExtend32 }; let rn = put_input_in_reg(ctx, inputs[0], narrow_mode); let rm = put_input_in_reg(ctx, inputs[1], narrow_mode); ctx.emit(Inst::AluRRR { alu_op: ALUOp::Mul32, rd, rn, rm, }); let shift_op = if is_signed { ShiftOp::AShR32 } else { ShiftOp::LShR32 }; let shift_amt = match input_ty { types::I16 => 16, types::I8 => 8, _ => unreachable!(), }; ctx.emit(Inst::ShiftRR { shift_op, rd, rn: rd.to_reg(), shift_imm: SImm20::maybe_from_i64(shift_amt).unwrap(), shift_reg: None, }); } _ => { panic!("Unsupported argument type for umulhi/smulhi: {}", input_ty); } } } Opcode::Udiv | Opcode::Urem => { let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); let ty = ty.unwrap(); let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); if ty_bits(ty) <= 32 { lower_constant_u32(ctx, writable_gpr(0), 0); if ty_bits(ty) < 32 { ctx.emit(Inst::Extend { rd: writable_gpr(1), rn, signed: false, from_bits: ty_bits(ty) as u8, to_bits: 32, }); } else { ctx.emit(Inst::mov32(writable_gpr(1), rn)); } } else { lower_constant_u64(ctx, writable_gpr(0), 0); ctx.emit(Inst::mov64(writable_gpr(1), rn)); } let narrow_mode = if ty.bits() < 32 { NarrowValueMode::ZeroExtend32 } else { NarrowValueMode::None }; let rm = put_input_in_reg(ctx, inputs[1], narrow_mode); if input_maybe_imm(ctx, inputs[1], 0) && flags.avoid_div_traps() { ctx.emit(Inst::CmpTrapRSImm16 { op: choose_32_64(ty, CmpOp::CmpS32, CmpOp::CmpS64), rn: rm, imm: 0, cond: Cond::from_intcc(IntCC::Equal), trap_code: TrapCode::IntegerDivisionByZero, }); } if ty_bits(ty) <= 32 { ctx.emit(Inst::UDivMod32 { rn: rm }); } else { ctx.emit(Inst::UDivMod64 { rn: rm }); } if op == Opcode::Udiv { ctx.emit(Inst::gen_move(rd, gpr(1), ty)); } else { ctx.emit(Inst::gen_move(rd, gpr(0), ty)); } } Opcode::Sdiv | Opcode::Srem => { let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); let ty = ty.unwrap(); let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); if ty_bits(ty) < 64 { ctx.emit(Inst::Extend { rd: writable_gpr(1), rn, signed: true, from_bits: ty_bits(ty) as u8, to_bits: 64, }); } else { ctx.emit(Inst::mov64(writable_gpr(1), rn)); } let narrow_mode = if ty.bits() < 32 { NarrowValueMode::SignExtend32 } else { NarrowValueMode::None }; let rm = put_input_in_reg(ctx, inputs[1], narrow_mode); if input_maybe_imm(ctx, inputs[1], 0) && flags.avoid_div_traps() { ctx.emit(Inst::CmpTrapRSImm16 { op: choose_32_64(ty, CmpOp::CmpS32, CmpOp::CmpS64), rn: rm, imm: 0, cond: Cond::from_intcc(IntCC::Equal), trap_code: TrapCode::IntegerDivisionByZero, }); } if input_maybe_imm(ctx, inputs[1], 0xffff_ffff_ffff_ffff) { if op == Opcode::Sdiv { let tmp = ctx.alloc_tmp(ty).only_reg().unwrap(); if ty_bits(ty) <= 32 { lower_constant_u32(ctx, tmp, (1 << (ty_bits(ty) - 1)) - 1); } else { lower_constant_u64(ctx, tmp, (1 << (ty_bits(ty) - 1)) - 1); } ctx.emit(Inst::AluRRR { alu_op: choose_32_64(ty, ALUOp::Xor32, ALUOp::Xor64), rd: tmp, rn: tmp.to_reg(), rm: gpr(1), }); ctx.emit(Inst::AluRRR { alu_op: choose_32_64(ty, ALUOp::And32, ALUOp::And64), rd: tmp, rn: tmp.to_reg(), rm, }); ctx.emit(Inst::CmpTrapRSImm16 { op: choose_32_64(ty, CmpOp::CmpS32, CmpOp::CmpS64), rn: tmp.to_reg(), imm: -1, cond: Cond::from_intcc(IntCC::Equal), trap_code: TrapCode::IntegerOverflow, }); } else { if ty_bits(ty) > 32 { ctx.emit(Inst::CmpRSImm16 { op: CmpOp::CmpS64, rn: rm, imm: -1, }); ctx.emit(Inst::CMov64SImm16 { rd: writable_gpr(1), cond: Cond::from_intcc(IntCC::Equal), imm: 0, }); } } } if ty_bits(ty) <= 32 { ctx.emit(Inst::SDivMod32 { rn: rm }); } else { ctx.emit(Inst::SDivMod64 { rn: rm }); } if op == Opcode::Sdiv { ctx.emit(Inst::gen_move(rd, gpr(1), ty)); } else { ctx.emit(Inst::gen_move(rd, gpr(0), ty)); } } Opcode::Uextend | Opcode::Sextend => { let ty = ty.unwrap(); let to_bits = ty_bits(ty) as u8; let to_bits = std::cmp::max(32, to_bits); let narrow_mode = match (op, to_bits) { (Opcode::Uextend, 32) => NarrowValueMode::ZeroExtend32, (Opcode::Uextend, 64) => NarrowValueMode::ZeroExtend64, (Opcode::Sextend, 32) => NarrowValueMode::SignExtend32, (Opcode::Sextend, 64) => NarrowValueMode::SignExtend64, _ => unreachable!(), }; let rn = put_input_in_reg(ctx, inputs[0], narrow_mode); let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); ctx.emit(Inst::gen_move(rd, rn, ty)); } Opcode::Ishl | Opcode::Ushr | Opcode::Sshr => { let ty = ty.unwrap(); let size = ty_bits(ty); let narrow_mode = match (op, size) { (Opcode::Ishl, _) => NarrowValueMode::None, (Opcode::Ushr, 64) => NarrowValueMode::ZeroExtend64, (Opcode::Ushr, _) => NarrowValueMode::ZeroExtend32, (Opcode::Sshr, 64) => NarrowValueMode::SignExtend64, (Opcode::Sshr, _) => NarrowValueMode::SignExtend32, _ => unreachable!(), }; let shift_op = match op { Opcode::Ishl => choose_32_64(ty, ShiftOp::LShL32, ShiftOp::LShL64), Opcode::Ushr => choose_32_64(ty, ShiftOp::LShR32, ShiftOp::LShR64), Opcode::Sshr => choose_32_64(ty, ShiftOp::AShR32, ShiftOp::AShR64), _ => unreachable!(), }; let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); let rn = put_input_in_reg(ctx, inputs[0], narrow_mode); if let Some(imm) = input_matches_const(ctx, inputs[1]) { let imm = imm & if size < 64 { 31 } else { 63 }; let shift_imm = SImm20::maybe_from_i64(imm as i64).unwrap(); let shift_reg = None; ctx.emit(Inst::ShiftRR { shift_op, rd, rn, shift_imm, shift_reg, }); } else { let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None); let shift_imm = SImm20::zero(); let shift_reg = if size < 64 { let tmp = ctx.alloc_tmp(types::I64).only_reg().unwrap(); ctx.emit(Inst::gen_move(tmp, rm, types::I64)); ctx.emit(Inst::AluRUImm16Shifted { alu_op: ALUOp::And64, rd: tmp, imm: UImm16Shifted::maybe_from_u64(31).unwrap(), }); Some(tmp.to_reg()) } else { Some(rm) }; ctx.emit(Inst::ShiftRR { shift_op, rd, rn, shift_imm, shift_reg, }); } } Opcode::Rotr | Opcode::Rotl => { // s390x doesn't have a right-rotate instruction, but a right rotation of K places is // effectively a left rotation of N - K places, if N is the integer's bit size. We // implement right rotations with this trick. // // For a 32-bit or 64-bit rotate-left, we can use the ROR instruction directly. // // For a < 32-bit rotate-left, we synthesize this as: // // rotr rd, rn, rm // // => // // zero-extend rn, <32-or-64> // and tmp_masked_rm, rm, // sub tmp1, tmp_masked_rm, // sub tmp1, zero, tmp1 ; neg // lsr tmp2, rn, tmp_masked_rm // lsl rd, rn, tmp1 // orr rd, rd, tmp2 // // For a constant amount, we can instead do: // // zero-extend rn, <32-or-64> // lsr tmp2, rn, # // lsl rd, rn, // orr rd, rd, tmp2 let is_rotr = op == Opcode::Rotr; let ty = ty.unwrap(); let ty_bits_size = ty_bits(ty) as u64; let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); let rn = put_input_in_reg( ctx, inputs[0], if ty_bits_size <= 32 { NarrowValueMode::ZeroExtend32 } else { NarrowValueMode::ZeroExtend64 }, ); if ty_bits_size == 32 || ty_bits_size == 64 { let shift_op = choose_32_64(ty, ShiftOp::RotL32, ShiftOp::RotL64); if let Some(imm) = input_matches_const(ctx, inputs[1]) { let shiftcount = imm & (ty_bits_size - 1); let shiftcount = if is_rotr { ty_bits_size - shiftcount } else { shiftcount }; ctx.emit(Inst::ShiftRR { shift_op, rd, rn, shift_imm: SImm20::maybe_from_i64(shiftcount as i64).unwrap(), shift_reg: None, }); } else { let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None); let rm = if is_rotr { // Really ty_bits_size - rn, but the upper bits of the result are // ignored (because of the implicit masking done by the instruction), // so this is equivalent to negating the input. let op = choose_32_64(ty, UnaryOp::Neg32, UnaryOp::Neg64); let tmp = ctx.alloc_tmp(ty).only_reg().unwrap(); ctx.emit(Inst::UnaryRR { op, rd: tmp, rn: rm, }); tmp.to_reg() } else { rm }; ctx.emit(Inst::ShiftRR { shift_op, rd, rn, shift_imm: SImm20::zero(), shift_reg: Some(rm), }); } } else { debug_assert!(ty_bits_size < 32); if let Some(imm) = input_matches_const(ctx, inputs[1]) { let rot_count = imm & (ty_bits_size - 1); let (lshl_count, lshr_count) = if is_rotr { (ty_bits_size - rot_count, rot_count) } else { (rot_count, ty_bits_size - rot_count) }; let tmp1 = ctx.alloc_tmp(types::I32).only_reg().unwrap(); ctx.emit(Inst::ShiftRR { shift_op: ShiftOp::LShL32, rd: tmp1, rn, shift_imm: SImm20::maybe_from_i64(lshl_count as i64).unwrap(), shift_reg: None, }); let tmp2 = ctx.alloc_tmp(types::I32).only_reg().unwrap(); ctx.emit(Inst::ShiftRR { shift_op: ShiftOp::LShR32, rd: tmp2, rn, shift_imm: SImm20::maybe_from_i64(lshr_count as i64).unwrap(), shift_reg: None, }); ctx.emit(Inst::AluRRR { alu_op: ALUOp::Orr32, rd, rn: tmp1.to_reg(), rm: tmp2.to_reg(), }); } else { let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None); let tmp1 = ctx.alloc_tmp(types::I32).only_reg().unwrap(); let tmp2 = ctx.alloc_tmp(types::I32).only_reg().unwrap(); ctx.emit(Inst::mov32(tmp1, rm)); ctx.emit(Inst::UnaryRR { op: UnaryOp::Neg32, rd: tmp2, rn: rm, }); ctx.emit(Inst::AluRUImm16Shifted { alu_op: ALUOp::And32, rd: tmp1, imm: UImm16Shifted::maybe_from_u64(ty_bits_size - 1).unwrap(), }); ctx.emit(Inst::AluRUImm16Shifted { alu_op: ALUOp::And32, rd: tmp2, imm: UImm16Shifted::maybe_from_u64(ty_bits_size - 1).unwrap(), }); let (lshr, lshl) = if is_rotr { (tmp2, tmp1) } else { (tmp1, tmp2) }; ctx.emit(Inst::ShiftRR { shift_op: ShiftOp::LShL32, rd: lshl, rn, shift_imm: SImm20::zero(), shift_reg: Some(lshl.to_reg()), }); ctx.emit(Inst::ShiftRR { shift_op: ShiftOp::LShR32, rd: lshr, rn, shift_imm: SImm20::zero(), shift_reg: Some(lshr.to_reg()), }); ctx.emit(Inst::AluRRR { alu_op: ALUOp::Orr32, rd, rn: lshl.to_reg(), rm: lshr.to_reg(), }); } } } Opcode::Bnot => { let ty = ty.unwrap(); let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); if isa_flags.has_mie2() { ctx.emit(Inst::AluRRR { alu_op: choose_32_64(ty, ALUOp::OrrNot32, ALUOp::OrrNot64), rd, rn, rm: rn, }); } else { ctx.emit(Inst::gen_move(rd, rn, ty)); lower_bnot(ctx, ty, rd); } } Opcode::Band => { let ty = ty.unwrap(); let alu_op = choose_32_64(ty, ALUOp::And32, ALUOp::And64); let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); if let Some(imm) = input_matches_uimm16shifted_inv(ctx, inputs[1]) { ctx.emit(Inst::gen_move(rd, rn, ty)); ctx.emit(Inst::AluRUImm16Shifted { alu_op, rd, imm }); } else if let Some(imm) = input_matches_uimm32shifted_inv(ctx, inputs[1]) { ctx.emit(Inst::gen_move(rd, rn, ty)); ctx.emit(Inst::AluRUImm32Shifted { alu_op, rd, imm }); } else if let Some(mem) = input_matches_mem(ctx, inputs[1]) { ctx.emit(Inst::gen_move(rd, rn, ty)); ctx.emit(Inst::AluRX { alu_op, rd, mem }); } else { let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None); ctx.emit(Inst::AluRRR { alu_op, rd, rn, rm }); } } Opcode::Bor => { let ty = ty.unwrap(); let alu_op = choose_32_64(ty, ALUOp::Orr32, ALUOp::Orr64); let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); if let Some(imm) = input_matches_uimm16shifted(ctx, inputs[1]) { ctx.emit(Inst::gen_move(rd, rn, ty)); ctx.emit(Inst::AluRUImm16Shifted { alu_op, rd, imm }); } else if let Some(imm) = input_matches_uimm32shifted(ctx, inputs[1]) { ctx.emit(Inst::gen_move(rd, rn, ty)); ctx.emit(Inst::AluRUImm32Shifted { alu_op, rd, imm }); } else if let Some(mem) = input_matches_mem(ctx, inputs[1]) { ctx.emit(Inst::gen_move(rd, rn, ty)); ctx.emit(Inst::AluRX { alu_op, rd, mem }); } else { let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None); ctx.emit(Inst::AluRRR { alu_op, rd, rn, rm }); } } Opcode::Bxor => { let ty = ty.unwrap(); let alu_op = choose_32_64(ty, ALUOp::Xor32, ALUOp::Xor64); let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); if let Some(imm) = input_matches_uimm32shifted(ctx, inputs[1]) { ctx.emit(Inst::gen_move(rd, rn, ty)); ctx.emit(Inst::AluRUImm32Shifted { alu_op, rd, imm }); } else if let Some(mem) = input_matches_mem(ctx, inputs[1]) { ctx.emit(Inst::gen_move(rd, rn, ty)); ctx.emit(Inst::AluRX { alu_op, rd, mem }); } else { let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None); ctx.emit(Inst::AluRRR { alu_op, rd, rn, rm }); } } Opcode::BandNot | Opcode::BorNot | Opcode::BxorNot => { let ty = ty.unwrap(); let alu_op = match (op, isa_flags.has_mie2()) { (Opcode::BandNot, true) => choose_32_64(ty, ALUOp::AndNot32, ALUOp::AndNot64), (Opcode::BorNot, true) => choose_32_64(ty, ALUOp::OrrNot32, ALUOp::OrrNot64), (Opcode::BxorNot, true) => choose_32_64(ty, ALUOp::XorNot32, ALUOp::XorNot64), (Opcode::BandNot, false) => choose_32_64(ty, ALUOp::And32, ALUOp::And64), (Opcode::BorNot, false) => choose_32_64(ty, ALUOp::Orr32, ALUOp::Orr64), (Opcode::BxorNot, false) => choose_32_64(ty, ALUOp::Xor32, ALUOp::Xor64), _ => unreachable!(), }; let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None); ctx.emit(Inst::AluRRR { alu_op, rd, rn, rm }); if !isa_flags.has_mie2() { lower_bnot(ctx, ty, rd); } } Opcode::Bitselect => { let ty = ty.unwrap(); let tmp = ctx.alloc_tmp(types::I64).only_reg().unwrap(); let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); let rcond = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); let rn = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None); let rm = put_input_in_reg(ctx, inputs[2], NarrowValueMode::None); ctx.emit(Inst::AluRRR { alu_op: choose_32_64(ty, ALUOp::And32, ALUOp::And64), rd: tmp, rn, rm: rcond, }); if isa_flags.has_mie2() { ctx.emit(Inst::AluRRR { alu_op: choose_32_64(ty, ALUOp::AndNot32, ALUOp::AndNot64), rd, rn: rm, rm: rcond, }); } else { ctx.emit(Inst::AluRRR { alu_op: choose_32_64(ty, ALUOp::And32, ALUOp::And64), rd, rn: rm, rm: rcond, }); lower_bnot(ctx, ty, rd); } ctx.emit(Inst::AluRRR { alu_op: choose_32_64(ty, ALUOp::Orr32, ALUOp::Orr64), rd, rn: rd.to_reg(), rm: tmp.to_reg(), }); } Opcode::Bextend | Opcode::Bmask => { // Bextend and Bmask both simply sign-extend. This works for: // - Bextend, because booleans are stored as 0 / -1, so we // sign-extend the -1 to a -1 in the wider width. // - Bmask, because the resulting integer mask value must be // all-ones (-1) if the argument is true. // // For a sign-extension from a 1-bit value (Case 1 below), we need // to do things a bit specially, because the ISA does not have a // 1-to-N-bit sign extension instruction. For 8-bit or wider // sources (Case 2 below), we do a sign extension normally. let from_ty = ctx.input_ty(insn, 0); let to_ty = ctx.output_ty(insn, 0); let from_bits = ty_bits(from_ty); let to_bits = ty_bits(to_ty); assert!( from_bits <= 64 && to_bits <= 64, "Vector Bextend not supported yet" ); if from_bits >= to_bits { // Just a move. let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); let ty = ctx.input_ty(insn, 0); ctx.emit(Inst::gen_move(rd, rn, ty)); } else if from_bits == 1 { assert!(to_bits >= 8); // Case 1: 1-bit to N-bit extension: use a shift-left / // shift-right sequence to create a 0 / -1 result. let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); let shl_op = choose_32_64(to_ty, ShiftOp::LShL32, ShiftOp::LShL64); let shr_op = choose_32_64(to_ty, ShiftOp::AShR32, ShiftOp::AShR64); let count = if to_bits > 32 { 63 } else { 31 }; ctx.emit(Inst::ShiftRR { shift_op: shl_op, rd, rn, shift_imm: SImm20::maybe_from_i64(count.into()).unwrap(), shift_reg: None, }); ctx.emit(Inst::ShiftRR { shift_op: shr_op, rd, rn: rd.to_reg(), shift_imm: SImm20::maybe_from_i64(count.into()).unwrap(), shift_reg: None, }); } else { // Case 2: 8-or-more-bit to N-bit extension: just sign-extend. A // `true` (all ones, or `-1`) will be extended to -1 with the // larger width. assert!(from_bits >= 8); let narrow_mode = if to_bits == 64 { NarrowValueMode::SignExtend64 } else { assert!(to_bits <= 32); NarrowValueMode::SignExtend32 }; let rn = put_input_in_reg(ctx, inputs[0], narrow_mode); let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); ctx.emit(Inst::gen_move(rd, rn, to_ty)); } } Opcode::Bint => { // Booleans are stored as all-zeroes (0) or all-ones (-1). We AND // out the LSB to give a 0 / 1-valued integer result. let ty = ty.unwrap(); let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); if ty_bits(ty) <= 16 { ctx.emit(Inst::gen_move(rd, rn, ty)); ctx.emit(Inst::AluRUImm16Shifted { alu_op: ALUOp::And32, rd, imm: UImm16Shifted::maybe_from_u64(1).unwrap(), }); } else if ty_bits(ty) <= 32 { ctx.emit(Inst::gen_move(rd, rn, ty)); ctx.emit(Inst::AluRUImm32Shifted { alu_op: ALUOp::And32, rd, imm: UImm32Shifted::maybe_from_u64(1).unwrap(), }); } else { let tmp = ctx.alloc_tmp(types::I64).only_reg().unwrap(); lower_constant_u64(ctx, tmp, 1); ctx.emit(Inst::AluRRR { alu_op: ALUOp::And64, rd, rn, rm: tmp.to_reg(), }); } } Opcode::Clz => { let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); let ty = ty.unwrap(); let ty_bits_size = ty_bits(ty); let rn = if ty_bits_size < 64 { let tmp = ctx.alloc_tmp(types::I64).only_reg().unwrap(); ctx.emit(Inst::Extend { rd: tmp, rn, signed: false, from_bits: ty_bits_size as u8, to_bits: 64, }); tmp.to_reg() } else { rn }; ctx.emit(Inst::Flogr { rn }); ctx.emit(Inst::gen_move(rd, gpr(0), ty)); if ty_bits_size < 64 { ctx.emit(Inst::AluRSImm16 { alu_op: ALUOp::Add32, rd, imm: -(64 - ty_bits_size as i16), }); } } Opcode::Cls => { let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); let ty = ty.unwrap(); let ty_bits_size = ty_bits(ty); let rn = if ty_bits_size < 64 { let tmp = ctx.alloc_tmp(types::I64).only_reg().unwrap(); ctx.emit(Inst::Extend { rd: tmp, rn, signed: true, from_bits: ty_bits_size as u8, to_bits: 64, }); tmp.to_reg() } else { rn }; // tmp = rn ^ ((signed)rn >> 63) let tmp = ctx.alloc_tmp(types::I64).only_reg().unwrap(); ctx.emit(Inst::ShiftRR { shift_op: ShiftOp::AShR64, rd: tmp, rn, shift_imm: SImm20::maybe_from_i64(63).unwrap(), shift_reg: None, }); ctx.emit(Inst::AluRRR { alu_op: ALUOp::Xor64, rd: tmp, rn: tmp.to_reg(), rm: rn, }); ctx.emit(Inst::Flogr { rn }); ctx.emit(Inst::gen_move(rd, gpr(0), ty)); if ty_bits_size < 64 { ctx.emit(Inst::AluRSImm16 { alu_op: ALUOp::Add32, rd, imm: -(64 - ty_bits_size as i16), }); } } Opcode::Ctz => { let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); let ty = ty.unwrap(); let ty_bits_size = ty_bits(ty); let rn = if ty_bits_size < 64 { let tmp = ctx.alloc_tmp(types::I64).only_reg().unwrap(); ctx.emit(Inst::gen_move(tmp, rn, ty)); ctx.emit(Inst::AluRUImm16Shifted { alu_op: ALUOp::Orr64, rd: tmp, imm: UImm16Shifted::maybe_from_u64(1u64 << ty_bits_size).unwrap(), }); tmp.to_reg() } else { rn }; // tmp = rn & -rn let tmp = ctx.alloc_tmp(types::I64).only_reg().unwrap(); ctx.emit(Inst::UnaryRR { op: UnaryOp::Neg64, rd: tmp, rn, }); ctx.emit(Inst::AluRRR { alu_op: ALUOp::And64, rd: tmp, rn: tmp.to_reg(), rm: rn, }); ctx.emit(Inst::Flogr { rn: tmp.to_reg() }); if ty_bits_size == 64 { ctx.emit(Inst::CMov64SImm16 { rd: writable_gpr(0), cond: Cond::from_intcc(IntCC::Equal), imm: -1, }); } if ty_bits_size <= 32 { lower_constant_u32(ctx, rd, 63); } else { lower_constant_u64(ctx, rd, 63); } let alu_op = choose_32_64(ty, ALUOp::Sub32, ALUOp::Sub64); ctx.emit(Inst::AluRRR { alu_op, rd, rn: rd.to_reg(), rm: gpr(0), }); } Opcode::Bitrev => unimplemented!(), Opcode::Popcnt => { let ty = ty.unwrap(); let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); if ty_bits(ty) <= 8 { let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); ctx.emit(Inst::UnaryRR { op: UnaryOp::PopcntByte, rd, rn, }); } else if isa_flags.has_mie2() { let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::ZeroExtend64); ctx.emit(Inst::UnaryRR { op: UnaryOp::PopcntReg, rd, rn, }); } else { let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); ctx.emit(Inst::UnaryRR { op: UnaryOp::PopcntByte, rd, rn, }); let tmp = ctx.alloc_tmp(types::I64).only_reg().unwrap(); let mut shift = ty_bits(ty) as u8; while shift > 8 { shift = shift / 2; ctx.emit(Inst::ShiftRR { shift_op: choose_32_64(ty, ShiftOp::LShL32, ShiftOp::LShL64), rd: tmp, rn: rd.to_reg(), shift_imm: SImm20::maybe_from_i64(shift.into()).unwrap(), shift_reg: None, }); ctx.emit(Inst::AluRR { alu_op: choose_32_64(ty, ALUOp::Add32, ALUOp::Add64), rd, rm: tmp.to_reg(), }); } let shift = ty_bits(ty) as u8 - 8; ctx.emit(Inst::ShiftRR { shift_op: choose_32_64(ty, ShiftOp::LShR32, ShiftOp::LShR64), rd, rn: rd.to_reg(), shift_imm: SImm20::maybe_from_i64(shift.into()).unwrap(), shift_reg: None, }); } } Opcode::Fadd | Opcode::Fsub | Opcode::Fmul | Opcode::Fdiv => { let bits = ty_bits(ctx.output_ty(insn, 0)); let fpu_op = match (op, bits) { (Opcode::Fadd, 32) => FPUOp2::Add32, (Opcode::Fadd, 64) => FPUOp2::Add64, (Opcode::Fsub, 32) => FPUOp2::Sub32, (Opcode::Fsub, 64) => FPUOp2::Sub64, (Opcode::Fmul, 32) => FPUOp2::Mul32, (Opcode::Fmul, 64) => FPUOp2::Mul64, (Opcode::Fdiv, 32) => FPUOp2::Div32, (Opcode::Fdiv, 64) => FPUOp2::Div64, _ => panic!("Unknown op/bits combination"), }; let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None); let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); ctx.emit(Inst::mov64(rd, rn)); ctx.emit(Inst::FpuRRR { fpu_op, rd, rm }); } Opcode::Fmin | Opcode::Fmax => { let bits = ty_bits(ctx.output_ty(insn, 0)); let fpu_op = match (op, bits) { (Opcode::Fmin, 32) => FPUOp2::Min32, (Opcode::Fmin, 64) => FPUOp2::Min64, (Opcode::Fmax, 32) => FPUOp2::Max32, (Opcode::Fmax, 64) => FPUOp2::Max64, _ => panic!("Unknown op/bits combination"), }; let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None); let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); ctx.emit(Inst::FpuVecRRR { fpu_op, rd, rn, rm }); } Opcode::Sqrt | Opcode::Fneg | Opcode::Fabs | Opcode::Fpromote | Opcode::Fdemote => { let bits = ty_bits(ctx.output_ty(insn, 0)); let fpu_op = match (op, bits) { (Opcode::Sqrt, 32) => FPUOp1::Sqrt32, (Opcode::Sqrt, 64) => FPUOp1::Sqrt64, (Opcode::Fneg, 32) => FPUOp1::Neg32, (Opcode::Fneg, 64) => FPUOp1::Neg64, (Opcode::Fabs, 32) => FPUOp1::Abs32, (Opcode::Fabs, 64) => FPUOp1::Abs64, (Opcode::Fpromote, 32) => panic!("Cannot promote to 32 bits"), (Opcode::Fpromote, 64) => FPUOp1::Cvt32To64, (Opcode::Fdemote, 32) => FPUOp1::Cvt64To32, (Opcode::Fdemote, 64) => panic!("Cannot demote to 64 bits"), _ => panic!("Unknown op/bits combination"), }; let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); ctx.emit(Inst::FpuRR { fpu_op, rd, rn }); } Opcode::Ceil | Opcode::Floor | Opcode::Trunc | Opcode::Nearest => { let bits = ty_bits(ctx.output_ty(insn, 0)); let op = match (op, bits) { (Opcode::Ceil, 32) => FpuRoundMode::Plus32, (Opcode::Ceil, 64) => FpuRoundMode::Plus64, (Opcode::Floor, 32) => FpuRoundMode::Minus32, (Opcode::Floor, 64) => FpuRoundMode::Minus64, (Opcode::Trunc, 32) => FpuRoundMode::Zero32, (Opcode::Trunc, 64) => FpuRoundMode::Zero64, (Opcode::Nearest, 32) => FpuRoundMode::Nearest32, (Opcode::Nearest, 64) => FpuRoundMode::Nearest64, _ => panic!("Unknown op/bits combination"), }; let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); ctx.emit(Inst::FpuRound { op, rd, rn }); } Opcode::Fma => { let bits = ty_bits(ctx.output_ty(insn, 0)); let fpu_op = match bits { 32 => FPUOp3::MAdd32, 64 => FPUOp3::MAdd64, _ => panic!("Unknown op size"), }; let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None); let ra = put_input_in_reg(ctx, inputs[2], NarrowValueMode::None); let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); ctx.emit(Inst::mov64(rd, ra)); ctx.emit(Inst::FpuRRRR { fpu_op, rd, rn, rm }); } Opcode::Fcopysign => { let ty = ctx.output_ty(insn, 0); let bits = ty_bits(ty) as u8; assert!(bits == 32 || bits == 64); let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None); let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); ctx.emit(Inst::FpuCopysign { rd, rn, rm }); } Opcode::FcvtFromUint | Opcode::FcvtFromSint => { let in_bits = ty_bits(ctx.input_ty(insn, 0)); let out_bits = ty_bits(ctx.output_ty(insn, 0)); let signed = op == Opcode::FcvtFromSint; let op = match (signed, in_bits, out_bits) { (false, 32, 32) => IntToFpuOp::U32ToF32, (true, 32, 32) => IntToFpuOp::I32ToF32, (false, 32, 64) => IntToFpuOp::U32ToF64, (true, 32, 64) => IntToFpuOp::I32ToF64, (false, 64, 32) => IntToFpuOp::U64ToF32, (true, 64, 32) => IntToFpuOp::I64ToF32, (false, 64, 64) => IntToFpuOp::U64ToF64, (true, 64, 64) => IntToFpuOp::I64ToF64, _ => panic!("Unknown input/output-bits combination"), }; let narrow_mode = match (signed, in_bits) { (false, 32) => NarrowValueMode::ZeroExtend32, (true, 32) => NarrowValueMode::SignExtend32, (false, 64) => NarrowValueMode::ZeroExtend64, (true, 64) => NarrowValueMode::SignExtend64, _ => panic!("Unknown input size"), }; let rn = put_input_in_reg(ctx, inputs[0], narrow_mode); let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); ctx.emit(Inst::IntToFpu { op, rd, rn }); } Opcode::FcvtToUint | Opcode::FcvtToSint => { let in_bits = ty_bits(ctx.input_ty(insn, 0)); let out_bits = ty_bits(ctx.output_ty(insn, 0)); let signed = op == Opcode::FcvtToSint; let op = match (signed, in_bits, out_bits) { (false, 32, 32) => FpuToIntOp::F32ToU32, (true, 32, 32) => FpuToIntOp::F32ToI32, (false, 32, 64) => FpuToIntOp::F32ToU64, (true, 32, 64) => FpuToIntOp::F32ToI64, (false, 64, 32) => FpuToIntOp::F64ToU32, (true, 64, 32) => FpuToIntOp::F64ToI32, (false, 64, 64) => FpuToIntOp::F64ToU64, (true, 64, 64) => FpuToIntOp::F64ToI64, _ => panic!("Unknown input/output-bits combination"), }; let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); // First, check whether the input is a NaN and trap if so. if in_bits == 32 { ctx.emit(Inst::FpuCmp32 { rn, rm: rn }); } else { ctx.emit(Inst::FpuCmp64 { rn, rm: rn }); } ctx.emit(Inst::TrapIf { trap_code: TrapCode::BadConversionToInteger, cond: Cond::from_floatcc(FloatCC::Unordered), }); // Perform the conversion. If this sets CC 3, we have a // "special case". Since we already exluded the case where // the input was a NaN, the only other option is that the // conversion overflowed the target type. ctx.emit(Inst::FpuToInt { op, rd, rn }); ctx.emit(Inst::TrapIf { trap_code: TrapCode::IntegerOverflow, cond: Cond::from_floatcc(FloatCC::Unordered), }); } Opcode::FcvtToUintSat | Opcode::FcvtToSintSat => { let in_bits = ty_bits(ctx.input_ty(insn, 0)); let out_bits = ty_bits(ctx.output_ty(insn, 0)); let signed = op == Opcode::FcvtToSintSat; let op = match (signed, in_bits, out_bits) { (false, 32, 32) => FpuToIntOp::F32ToU32, (true, 32, 32) => FpuToIntOp::F32ToI32, (false, 32, 64) => FpuToIntOp::F32ToU64, (true, 32, 64) => FpuToIntOp::F32ToI64, (false, 64, 32) => FpuToIntOp::F64ToU32, (true, 64, 32) => FpuToIntOp::F64ToI32, (false, 64, 64) => FpuToIntOp::F64ToU64, (true, 64, 64) => FpuToIntOp::F64ToI64, _ => panic!("Unknown input/output-bits combination"), }; let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); // Perform the conversion. ctx.emit(Inst::FpuToInt { op, rd, rn }); // In most special cases, the Z instruction already yields the // result expected by Cranelift semantic. The only exception // it the case where the input was a Nan. We explicitly check // for that and force the output to 0 in that case. if in_bits == 32 { ctx.emit(Inst::FpuCmp32 { rn, rm: rn }); } else { ctx.emit(Inst::FpuCmp64 { rn, rm: rn }); } let cond = Cond::from_floatcc(FloatCC::Unordered); if out_bits <= 32 { ctx.emit(Inst::CMov32SImm16 { rd, cond, imm: 0 }); } else { ctx.emit(Inst::CMov64SImm16 { rd, cond, imm: 0 }); } } Opcode::FcvtLowFromSint => unimplemented!("FcvtLowFromSint"), Opcode::Bitcast => { let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); let input_ty = ctx.input_ty(insn, 0); let output_ty = ctx.output_ty(insn, 0); lower_bitcast(ctx, rd, output_ty, rn, input_ty); } Opcode::Load | Opcode::Uload8 | Opcode::Sload8 | Opcode::Uload16 | Opcode::Sload16 | Opcode::Uload32 | Opcode::Sload32 | Opcode::LoadComplex | Opcode::Uload8Complex | Opcode::Sload8Complex | Opcode::Uload16Complex | Opcode::Sload16Complex | Opcode::Uload32Complex | Opcode::Sload32Complex => { let off = ctx.data(insn).load_store_offset().unwrap(); let flags = ctx.memflags(insn).unwrap(); let endianness = flags.endianness(Endianness::Big); let elem_ty = ctx.output_ty(insn, 0); let is_float = ty_is_float(elem_ty); let to_bits = ty_bits(elem_ty); let from_bits = match op { Opcode::Load | Opcode::LoadComplex => to_bits, Opcode::Sload8 | Opcode::Uload8 | Opcode::Sload8Complex | Opcode::Uload8Complex => { 8 } Opcode::Sload16 | Opcode::Uload16 | Opcode::Sload16Complex | Opcode::Uload16Complex => 16, Opcode::Sload32 | Opcode::Uload32 | Opcode::Sload32Complex | Opcode::Uload32Complex => 32, _ => unreachable!(), }; let ext_bits = if to_bits < 32 { 32 } else { to_bits }; let sign_extend = match op { Opcode::Sload8 | Opcode::Sload8Complex | Opcode::Sload16 | Opcode::Sload16Complex | Opcode::Sload32 | Opcode::Sload32Complex => true, _ => false, }; let mem = lower_address(ctx, &inputs[..], off, flags); let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); if endianness == Endianness::Big { ctx.emit(match (ext_bits, from_bits, sign_extend, is_float) { (32, 32, _, true) => Inst::FpuLoad32 { rd, mem }, (64, 64, _, true) => Inst::FpuLoad64 { rd, mem }, (32, 32, _, false) => Inst::Load32 { rd, mem }, (64, 64, _, false) => Inst::Load64 { rd, mem }, (32, 8, false, _) => Inst::Load32ZExt8 { rd, mem }, (32, 8, true, _) => Inst::Load32SExt8 { rd, mem }, (32, 16, false, _) => Inst::Load32ZExt16 { rd, mem }, (32, 16, true, _) => Inst::Load32SExt16 { rd, mem }, (64, 8, false, _) => Inst::Load64ZExt8 { rd, mem }, (64, 8, true, _) => Inst::Load64SExt8 { rd, mem }, (64, 16, false, _) => Inst::Load64ZExt16 { rd, mem }, (64, 16, true, _) => Inst::Load64SExt16 { rd, mem }, (64, 32, false, _) => Inst::Load64ZExt32 { rd, mem }, (64, 32, true, _) => Inst::Load64SExt32 { rd, mem }, _ => panic!("Unsupported size in load"), }); } else if !is_float { ctx.emit(match (ext_bits, from_bits, sign_extend) { (_, 16, _) => Inst::LoadRev16 { rd, mem }, (_, 32, _) => Inst::LoadRev32 { rd, mem }, (_, 64, _) => Inst::LoadRev64 { rd, mem }, (32, 8, false) => Inst::Load32ZExt8 { rd, mem }, (32, 8, true) => Inst::Load32SExt8 { rd, mem }, (64, 8, false) => Inst::Load64ZExt8 { rd, mem }, (64, 8, true) => Inst::Load64SExt8 { rd, mem }, _ => panic!("Unsupported size in load"), }); if to_bits > from_bits && from_bits > 8 { ctx.emit(Inst::Extend { rd, rn: rd.to_reg(), signed: sign_extend, from_bits: from_bits as u8, to_bits: to_bits as u8, }); } } else if isa_flags.has_vxrs_ext2() { ctx.emit(match from_bits { 32 => Inst::FpuLoadRev32 { rd, mem }, 64 => Inst::FpuLoadRev64 { rd, mem }, _ => panic!("Unsupported size in load"), }); } else { match from_bits { 32 => { let tmp = ctx.alloc_tmp(types::I32).only_reg().unwrap(); ctx.emit(Inst::LoadRev32 { rd: tmp, mem }); lower_bitcast(ctx, rd, elem_ty, tmp.to_reg(), types::I32); } 64 => { let tmp = ctx.alloc_tmp(types::I64).only_reg().unwrap(); ctx.emit(Inst::LoadRev64 { rd: tmp, mem }); lower_bitcast(ctx, rd, elem_ty, tmp.to_reg(), types::I64); } _ => panic!("Unsupported size in load"), } } } Opcode::Store | Opcode::Istore8 | Opcode::Istore16 | Opcode::Istore32 | Opcode::StoreComplex | Opcode::Istore8Complex | Opcode::Istore16Complex | Opcode::Istore32Complex => { let off = ctx.data(insn).load_store_offset().unwrap(); let flags = ctx.memflags(insn).unwrap(); let endianness = flags.endianness(Endianness::Big); let elem_ty = match op { Opcode::Istore8 | Opcode::Istore8Complex => types::I8, Opcode::Istore16 | Opcode::Istore16Complex => types::I16, Opcode::Istore32 | Opcode::Istore32Complex => types::I32, Opcode::Store | Opcode::StoreComplex => ctx.input_ty(insn, 0), _ => unreachable!(), }; let mem = lower_address(ctx, &inputs[1..], off, flags); if ty_is_float(elem_ty) { let rd = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); if endianness == Endianness::Big { ctx.emit(match ty_bits(elem_ty) { 32 => Inst::FpuStore32 { rd, mem }, 64 => Inst::FpuStore64 { rd, mem }, _ => panic!("Unsupported size in store"), }); } else if isa_flags.has_vxrs_ext2() { ctx.emit(match ty_bits(elem_ty) { 32 => Inst::FpuStoreRev32 { rd, mem }, 64 => Inst::FpuStoreRev64 { rd, mem }, _ => panic!("Unsupported size in store"), }); } else { match ty_bits(elem_ty) { 32 => { let tmp = ctx.alloc_tmp(types::I32).only_reg().unwrap(); lower_bitcast(ctx, tmp, types::I32, rd, elem_ty); ctx.emit(Inst::StoreRev32 { rd: tmp.to_reg(), mem, }); } 64 => { let tmp = ctx.alloc_tmp(types::I64).only_reg().unwrap(); lower_bitcast(ctx, tmp, types::I64, rd, elem_ty); ctx.emit(Inst::StoreRev64 { rd: tmp.to_reg(), mem, }); } _ => panic!("Unsupported size in load"), } } } else if ty_bits(elem_ty) <= 16 { if let Some(imm) = input_matches_const(ctx, inputs[0]) { ctx.emit(match (endianness, ty_bits(elem_ty)) { (_, 1) | (_, 8) => Inst::StoreImm8 { imm: imm as u8, mem, }, (Endianness::Big, 16) => Inst::StoreImm16 { imm: imm as i16, mem, }, (Endianness::Little, 16) => Inst::StoreImm16 { imm: (imm as i16).swap_bytes(), mem, }, _ => panic!("Unsupported size in store"), }); } else { let rd = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); ctx.emit(match (endianness, ty_bits(elem_ty)) { (_, 1) | (_, 8) => Inst::Store8 { rd, mem }, (Endianness::Big, 16) => Inst::Store16 { rd, mem }, (Endianness::Little, 16) => Inst::StoreRev16 { rd, mem }, _ => panic!("Unsupported size in store"), }); } } else if endianness == Endianness::Big { if let Some(imm) = input_matches_simm16(ctx, inputs[0]) { ctx.emit(match ty_bits(elem_ty) { 32 => Inst::StoreImm32SExt16 { imm, mem }, 64 => Inst::StoreImm64SExt16 { imm, mem }, _ => panic!("Unsupported size in store"), }); } else { let rd = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); ctx.emit(match ty_bits(elem_ty) { 32 => Inst::Store32 { rd, mem }, 64 => Inst::Store64 { rd, mem }, _ => panic!("Unsupported size in store"), }); } } else { let rd = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); ctx.emit(match ty_bits(elem_ty) { 32 => Inst::StoreRev32 { rd, mem }, 64 => Inst::StoreRev64 { rd, mem }, _ => panic!("Unsupported size in store"), }); } } Opcode::StackLoad | Opcode::StackStore => { panic!("Direct stack memory access not supported; should not be used by Wasm"); } Opcode::StackAddr => { let (stack_slot, offset) = match *ctx.data(insn) { InstructionData::StackLoad { opcode: Opcode::StackAddr, stack_slot, offset, } => (stack_slot, offset), _ => unreachable!(), }; let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); let offset: i32 = offset.into(); let inst = ctx .abi() .stackslot_addr(stack_slot, u32::try_from(offset).unwrap(), rd); ctx.emit(inst); } Opcode::ConstAddr => unimplemented!(), Opcode::FuncAddr => { let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); let (extname, dist) = ctx.call_target(insn).unwrap(); let extname = extname.clone(); if dist == RelocDistance::Near { ctx.emit(Inst::LoadAddr { rd, mem: MemArg::Symbol { name: Box::new(extname), offset: 0, flags: MemFlags::trusted(), }, }); } else { ctx.emit(Inst::LoadExtNameFar { rd, name: Box::new(extname), offset: 0, }); } } Opcode::SymbolValue => { let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); let (extname, dist, offset) = ctx.symbol_value(insn).unwrap(); let extname = extname.clone(); if dist == RelocDistance::Near && (offset & 1) == 0 && i32::try_from(offset).is_ok() { ctx.emit(Inst::LoadAddr { rd, mem: MemArg::Symbol { name: Box::new(extname), offset: i32::try_from(offset).unwrap(), flags: MemFlags::trusted(), }, }); } else { ctx.emit(Inst::LoadExtNameFar { rd, name: Box::new(extname), offset, }); } } Opcode::HeapAddr => { panic!("heap_addr should have been removed by legalization!"); } Opcode::TableAddr => { panic!("table_addr should have been removed by legalization!"); } Opcode::GlobalValue => { panic!("global_value should have been removed by legalization!"); } Opcode::TlsValue => { unimplemented!("Thread-local storage support not implemented!"); } Opcode::GetPinnedReg | Opcode::SetPinnedReg => { unimplemented!("Pinned register support not implemented!"); } Opcode::Icmp => { let condcode = ctx.data(insn).cond_code().unwrap(); let cond = Cond::from_intcc(condcode); let is_signed = condcode_is_signed(condcode); lower_icmp_to_flags(ctx, insn, is_signed, true); let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); let ty = ctx.output_ty(insn, 0); lower_flags_to_bool_result(ctx, cond, rd, ty); } Opcode::Fcmp => { let condcode = ctx.data(insn).fp_cond_code().unwrap(); let cond = Cond::from_floatcc(condcode); lower_fcmp_to_flags(ctx, insn); let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); let ty = ctx.output_ty(insn, 0); lower_flags_to_bool_result(ctx, cond, rd, ty); } Opcode::IsNull | Opcode::IsInvalid => { // Null references are represented by the constant value 0; invalid // references are represented by the constant value -1. let cond = Cond::from_intcc(IntCC::Equal); let imm = match op { Opcode::IsNull => 0, Opcode::IsInvalid => -1, _ => unreachable!(), }; let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); ctx.emit(Inst::CmpRSImm16 { op: CmpOp::CmpS64, rn, imm, }); let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); let ty = ctx.output_ty(insn, 0); lower_flags_to_bool_result(ctx, cond, rd, ty); } Opcode::Select => { let ty = ctx.output_ty(insn, 0); let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); let rn = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None); let rm = put_input_in_reg(ctx, inputs[2], NarrowValueMode::None); let cond = lower_boolean_to_flags(ctx, inputs[0]); ctx.emit(Inst::gen_move(rd, rm, ty)); if ty_is_float(ty) { if ty_bits(ty) < 64 { ctx.emit(Inst::FpuCMov32 { rd, cond, rm: rn }); } else { ctx.emit(Inst::FpuCMov64 { rd, cond, rm: rn }); } } else { if ty_bits(ty) < 64 { ctx.emit(Inst::CMov32 { rd, cond, rm: rn }); } else { ctx.emit(Inst::CMov64 { rd, cond, rm: rn }); } } } Opcode::SelectifSpectreGuard => { let ty = ctx.output_ty(insn, 0); let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); let rn = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None); let rm = put_input_in_reg(ctx, inputs[2], NarrowValueMode::None); let condcode = ctx.data(insn).cond_code().unwrap(); let cond = Cond::from_intcc(condcode); let is_signed = condcode_is_signed(condcode); // Verification ensures that the input is always a single-def ifcmp. let cmp_insn = ctx .get_input_as_source_or_const(inputs[0].insn, inputs[0].input) .inst .unwrap() .0; debug_assert_eq!(ctx.data(cmp_insn).opcode(), Opcode::Ifcmp); lower_icmp_to_flags(ctx, cmp_insn, is_signed, true); ctx.emit(Inst::gen_move(rd, rm, ty)); if ty_is_float(ty) { if ty_bits(ty) < 64 { ctx.emit(Inst::FpuCMov32 { rd, cond, rm: rn }); } else { ctx.emit(Inst::FpuCMov64 { rd, cond, rm: rn }); } } else { if ty_bits(ty) < 64 { ctx.emit(Inst::CMov32 { rd, cond, rm: rn }); } else { ctx.emit(Inst::CMov64 { rd, cond, rm: rn }); } } } Opcode::Trap | Opcode::ResumableTrap => { let trap_code = ctx.data(insn).trap_code().unwrap(); ctx.emit_safepoint(Inst::Trap { trap_code }) } Opcode::Trapz | Opcode::Trapnz | Opcode::ResumableTrapnz => { let cond = lower_boolean_to_flags(ctx, inputs[0]); let negated = op == Opcode::Trapz; let cond = if negated { cond.invert() } else { cond }; let trap_code = ctx.data(insn).trap_code().unwrap(); ctx.emit_safepoint(Inst::TrapIf { trap_code, cond }); } Opcode::Trapif => { let condcode = ctx.data(insn).cond_code().unwrap(); let mut cond = Cond::from_intcc(condcode); let is_signed = condcode_is_signed(condcode); let cmp_insn = ctx .get_input_as_source_or_const(inputs[0].insn, inputs[0].input) .inst .unwrap() .0; if ctx.data(cmp_insn).opcode() == Opcode::IaddIfcout { // The flags must not have been clobbered by any other instruction between the // iadd_ifcout and this instruction, as verified by the CLIF validator; so we // can simply rely on the condition code here. // // IaddIfcout is implemented via a ADD LOGICAL instruction, which sets the // the condition code as follows: // 0 Result zero; no carry // 1 Result not zero; no carry // 2 Result zero; carry // 3 Result not zero; carry // This means "carry" corresponds to condition code 2 or 3, i.e. // a condition mask of 2 | 1. // // As this does not match any of the encodings used with a normal integer // comparsion, this cannot be represented by any IntCC value. We need to // remap the IntCC::UnsignedGreaterThan value that we have here as result // of the unsigned_add_overflow_condition call to the correct mask. assert!(condcode == IntCC::UnsignedGreaterThan); cond = Cond::from_mask(2 | 1); } else { // Verification ensures that the input is always a single-def ifcmp debug_assert_eq!(ctx.data(cmp_insn).opcode(), Opcode::Ifcmp); lower_icmp_to_flags(ctx, cmp_insn, is_signed, true); } let trap_code = ctx.data(insn).trap_code().unwrap(); ctx.emit_safepoint(Inst::TrapIf { trap_code, cond }); } Opcode::Debugtrap => { ctx.emit(Inst::Debugtrap); } Opcode::Call | Opcode::CallIndirect => { let caller_conv = ctx.abi().call_conv(); let (mut abi, inputs) = match op { Opcode::Call => { let (extname, dist) = ctx.call_target(insn).unwrap(); let extname = extname.clone(); let sig = ctx.call_sig(insn).unwrap(); assert!(inputs.len() == sig.params.len()); assert!(outputs.len() == sig.returns.len()); ( S390xABICaller::from_func(sig, &extname, dist, caller_conv, flags)?, &inputs[..], ) } Opcode::CallIndirect => { let ptr = put_input_in_reg(ctx, inputs[0], NarrowValueMode::ZeroExtend64); let sig = ctx.call_sig(insn).unwrap(); assert!(inputs.len() - 1 == sig.params.len()); assert!(outputs.len() == sig.returns.len()); ( S390xABICaller::from_ptr(sig, ptr, op, caller_conv, flags)?, &inputs[1..], ) } _ => unreachable!(), }; assert!(inputs.len() == abi.num_args()); for (i, input) in inputs.iter().enumerate() { let arg_reg = put_input_in_reg(ctx, *input, NarrowValueMode::None); abi.emit_copy_regs_to_arg(ctx, i, ValueRegs::one(arg_reg)); } abi.emit_call(ctx); for (i, output) in outputs.iter().enumerate() { let retval_reg = get_output_reg(ctx, *output).only_reg().unwrap(); abi.emit_copy_retval_to_regs(ctx, i, ValueRegs::one(retval_reg)); } abi.accumulate_outgoing_args_size(ctx); } Opcode::FallthroughReturn | Opcode::Return => { for (i, input) in inputs.iter().enumerate() { let reg = put_input_in_reg(ctx, *input, NarrowValueMode::None); let retval_reg = ctx.retval(i).only_reg().unwrap(); let ty = ctx.input_ty(insn, i); ctx.emit(Inst::gen_move(retval_reg, reg, ty)); } // N.B.: the Ret itself is generated by the ABI. } Opcode::AtomicRmw => { let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); let addr = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); let rn = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None); let flags = ctx.memflags(insn).unwrap(); let endianness = flags.endianness(Endianness::Big); let ty = ty.unwrap(); assert!(is_valid_atomic_transaction_ty(ty)); if endianness == Endianness::Little { unimplemented!("Little-endian atomic operations not implemented"); } if ty_bits(ty) < 32 { unimplemented!("Sub-word atomic operations not implemented"); } let op = inst_common::AtomicRmwOp::from(ctx.data(insn).atomic_rmw_op().unwrap()); let (alu_op, rn) = match op { AtomicRmwOp::And => (choose_32_64(ty, ALUOp::And32, ALUOp::And64), rn), AtomicRmwOp::Or => (choose_32_64(ty, ALUOp::Orr32, ALUOp::Orr64), rn), AtomicRmwOp::Xor => (choose_32_64(ty, ALUOp::Xor32, ALUOp::Xor64), rn), AtomicRmwOp::Add => (choose_32_64(ty, ALUOp::Add32, ALUOp::Add64), rn), AtomicRmwOp::Sub => { let tmp_ty = choose_32_64(ty, types::I32, types::I64); let tmp = ctx.alloc_tmp(tmp_ty).only_reg().unwrap(); let neg_op = choose_32_64(ty, UnaryOp::Neg32, UnaryOp::Neg64); ctx.emit(Inst::UnaryRR { op: neg_op, rd: tmp, rn, }); (choose_32_64(ty, ALUOp::Add32, ALUOp::Add64), tmp.to_reg()) } _ => unimplemented!("AtomicRmw operation type {:?} not implemented", op), }; let mem = MemArg::reg(addr, flags); ctx.emit(Inst::AtomicRmw { alu_op, rd, rn, mem, }); } Opcode::AtomicCas => { let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); let addr = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None); let rn = put_input_in_reg(ctx, inputs[2], NarrowValueMode::None); let flags = ctx.memflags(insn).unwrap(); let endianness = flags.endianness(Endianness::Big); let ty = ty.unwrap(); assert!(is_valid_atomic_transaction_ty(ty)); if endianness == Endianness::Little { unimplemented!("Little-endian atomic operations not implemented"); } if ty_bits(ty) < 32 { unimplemented!("Sub-word atomic operations not implemented"); } let mem = MemArg::reg(addr, flags); ctx.emit(Inst::gen_move(rd, rm, ty)); if ty_bits(ty) == 32 { ctx.emit(Inst::AtomicCas32 { rd, rn, mem }); } else { ctx.emit(Inst::AtomicCas64 { rd, rn, mem }); } } Opcode::AtomicLoad => { let flags = ctx.memflags(insn).unwrap(); let endianness = flags.endianness(Endianness::Big); let ty = ty.unwrap(); assert!(is_valid_atomic_transaction_ty(ty)); let mem = lower_address(ctx, &inputs[..], 0, flags); let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); if endianness == Endianness::Big { ctx.emit(match ty_bits(ty) { 8 => Inst::Load32ZExt8 { rd, mem }, 16 => Inst::Load32ZExt16 { rd, mem }, 32 => Inst::Load32 { rd, mem }, 64 => Inst::Load64 { rd, mem }, _ => panic!("Unsupported size in load"), }); } else { ctx.emit(match ty_bits(ty) { 8 => Inst::Load32ZExt8 { rd, mem }, 16 => Inst::LoadRev16 { rd, mem }, 32 => Inst::LoadRev32 { rd, mem }, 64 => Inst::LoadRev64 { rd, mem }, _ => panic!("Unsupported size in load"), }); } } Opcode::AtomicStore => { let flags = ctx.memflags(insn).unwrap(); let endianness = flags.endianness(Endianness::Big); let ty = ctx.input_ty(insn, 0); assert!(is_valid_atomic_transaction_ty(ty)); let mem = lower_address(ctx, &inputs[1..], 0, flags); if ty_bits(ty) <= 16 { if let Some(imm) = input_matches_const(ctx, inputs[0]) { ctx.emit(match (endianness, ty_bits(ty)) { (_, 8) => Inst::StoreImm8 { imm: imm as u8, mem, }, (Endianness::Big, 16) => Inst::StoreImm16 { imm: imm as i16, mem, }, (Endianness::Little, 16) => Inst::StoreImm16 { imm: (imm as i16).swap_bytes(), mem, }, _ => panic!("Unsupported size in store"), }); } else { let rd = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); ctx.emit(match (endianness, ty_bits(ty)) { (_, 8) => Inst::Store8 { rd, mem }, (Endianness::Big, 16) => Inst::Store16 { rd, mem }, (Endianness::Little, 16) => Inst::StoreRev16 { rd, mem }, _ => panic!("Unsupported size in store"), }); } } else if endianness == Endianness::Big { if let Some(imm) = input_matches_simm16(ctx, inputs[0]) { ctx.emit(match ty_bits(ty) { 32 => Inst::StoreImm32SExt16 { imm, mem }, 64 => Inst::StoreImm64SExt16 { imm, mem }, _ => panic!("Unsupported size in store"), }); } else { let rd = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); ctx.emit(match ty_bits(ty) { 32 => Inst::Store32 { rd, mem }, 64 => Inst::Store64 { rd, mem }, _ => panic!("Unsupported size in store"), }); } } else { let rd = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); ctx.emit(match ty_bits(ty) { 32 => Inst::StoreRev32 { rd, mem }, 64 => Inst::StoreRev64 { rd, mem }, _ => panic!("Unsupported size in store"), }); } ctx.emit(Inst::Fence); } Opcode::Fence => { ctx.emit(Inst::Fence); } Opcode::RawBitcast | Opcode::Splat | Opcode::Swizzle | Opcode::Insertlane | Opcode::Extractlane | Opcode::Imin | Opcode::Umin | Opcode::Imax | Opcode::Umax | Opcode::AvgRound | Opcode::FminPseudo | Opcode::FmaxPseudo | Opcode::Uload8x8 | Opcode::Uload8x8Complex | Opcode::Sload8x8 | Opcode::Sload8x8Complex | Opcode::Uload16x4 | Opcode::Uload16x4Complex | Opcode::Sload16x4 | Opcode::Sload16x4Complex | Opcode::Uload32x2 | Opcode::Uload32x2Complex | Opcode::Sload32x2 | Opcode::Sload32x2Complex | Opcode::Vconst | Opcode::Shuffle | Opcode::Vsplit | Opcode::Vconcat | Opcode::Vselect | Opcode::VanyTrue | Opcode::VallTrue | Opcode::VhighBits | Opcode::ScalarToVector | Opcode::Snarrow | Opcode::Unarrow | Opcode::Uunarrow | Opcode::SwidenLow | Opcode::SwidenHigh | Opcode::UwidenLow | Opcode::UwidenHigh | Opcode::WideningPairwiseDotProductS | Opcode::SqmulRoundSat | Opcode::FvpromoteLow | Opcode::Fvdemote | Opcode::IaddPairwise => { // TODO unimplemented!("Vector ops not implemented."); } Opcode::Isplit | Opcode::Iconcat => unimplemented!("Wide integer ops not implemented."), Opcode::Spill | Opcode::Fill | Opcode::FillNop | Opcode::CopyNop | Opcode::AdjustSpDown | Opcode::AdjustSpUpImm | Opcode::AdjustSpDownImm | Opcode::IfcmpSp => { panic!("Unused opcode should not be encountered."); } Opcode::Ifcmp | Opcode::Ffcmp | Opcode::Trapff | Opcode::Trueif | Opcode::Trueff | Opcode::Selectif => { panic!("Flags opcode should not be encountered."); } Opcode::Jump | Opcode::Brz | Opcode::Brnz | Opcode::BrIcmp | Opcode::Brif | Opcode::Brff | Opcode::BrTable => { panic!("Branch opcode reached non-branch lowering logic!"); } Opcode::Safepoint => { panic!("safepoint instructions not used by new backend's safepoints!"); } Opcode::IaddImm | Opcode::ImulImm | Opcode::UdivImm | Opcode::SdivImm | Opcode::UremImm | Opcode::SremImm | Opcode::IrsubImm | Opcode::IaddCin | Opcode::IaddIfcin | Opcode::IaddCout | Opcode::IaddCarry | Opcode::IaddIfcarry | Opcode::IsubBin | Opcode::IsubIfbin | Opcode::IsubBout | Opcode::IsubIfbout | Opcode::IsubBorrow | Opcode::IsubIfborrow | Opcode::BandImm | Opcode::BorImm | Opcode::BxorImm | Opcode::RotlImm | Opcode::RotrImm | Opcode::IshlImm | Opcode::UshrImm | Opcode::SshrImm | Opcode::IcmpImm | Opcode::IfcmpImm => { panic!("ALU+imm and ALU+carry ops should not appear here!"); } } Ok(()) } //============================================================================ // Lowering: main entry point for lowering a branch group fn lower_branch>( ctx: &mut C, branches: &[IRInst], targets: &[MachLabel], ) -> CodegenResult<()> { // A block should end with at most two branches. The first may be a // conditional branch; a conditional branch can be followed only by an // unconditional branch or fallthrough. Otherwise, if only one branch, // it may be an unconditional branch, a fallthrough, a return, or a // trap. These conditions are verified by `is_ebb_basic()` during the // verifier pass. assert!(branches.len() <= 2); if branches.len() == 2 { // Must be a conditional branch followed by an unconditional branch. let op0 = ctx.data(branches[0]).opcode(); let op1 = ctx.data(branches[1]).opcode(); assert!(op1 == Opcode::Jump); let taken = BranchTarget::Label(targets[0]); let not_taken = BranchTarget::Label(targets[1]); match op0 { Opcode::Brz | Opcode::Brnz => { let flag_input = InsnInput { insn: branches[0], input: 0, }; let cond = lower_boolean_to_flags(ctx, flag_input); let negated = op0 == Opcode::Brz; let cond = if negated { cond.invert() } else { cond }; ctx.emit(Inst::CondBr { taken, not_taken, cond, }); } Opcode::Brif => { let condcode = ctx.data(branches[0]).cond_code().unwrap(); let cond = Cond::from_intcc(condcode); let is_signed = condcode_is_signed(condcode); // Verification ensures that the input is always a single-def ifcmp. let cmp_insn = ctx .get_input_as_source_or_const(branches[0], 0) .inst .unwrap() .0; debug_assert_eq!(ctx.data(cmp_insn).opcode(), Opcode::Ifcmp); lower_icmp_to_flags(ctx, cmp_insn, is_signed, true); ctx.emit(Inst::CondBr { taken, not_taken, cond, }); } Opcode::Brff => unreachable!(), _ => unimplemented!(), } } else { // Must be an unconditional branch or an indirect branch. let op = ctx.data(branches[0]).opcode(); match op { Opcode::Jump => { assert!(branches.len() == 1); ctx.emit(Inst::Jump { dest: BranchTarget::Label(targets[0]), }); } Opcode::BrTable => { let jt_size = targets.len() - 1; assert!(jt_size <= std::u32::MAX as usize); // Load up jump table element index. let ridx = put_input_in_reg( ctx, InsnInput { insn: branches[0], input: 0, }, NarrowValueMode::ZeroExtend64, ); // Temp registers needed by the compound instruction. let rtmp1 = ctx.alloc_tmp(types::I64).only_reg().unwrap(); let rtmp2 = ctx.alloc_tmp(types::I64).only_reg().unwrap(); // Emit the compound instruction that does: // // clgfi %rIdx, // jghe // sllg %rTmp2, %rIdx, 2 // larl %rTmp1, // lgf %rTmp2, 0(%rTmp2, %rTmp1) // agrk %rTmp1, %rTmp1, %rTmp2 // br %rA // [jt entries] // // This must be *one* instruction in the vcode because // we cannot allow regalloc to insert any spills/fills // in the middle of the sequence; otherwise, the ADR's // PC-rel offset to the jumptable would be incorrect. // (The alternative is to introduce a relocation pass // for inlined jumptables, which is much worse, IMHO.) let default_target = BranchTarget::Label(targets[0]); let jt_targets: Vec = targets .iter() .skip(1) .map(|bix| BranchTarget::Label(*bix)) .collect(); let targets_for_term: Vec = targets.to_vec(); ctx.emit(Inst::JTSequence { ridx, rtmp1, rtmp2, info: Box::new(JTSequenceInfo { default_target, targets: jt_targets, targets_for_term, }), }); } _ => panic!("Unknown branch type!"), } } Ok(()) } //============================================================================= // Lowering-backend trait implementation. impl LowerBackend for S390xBackend { type MInst = Inst; fn lower>(&self, ctx: &mut C, ir_inst: IRInst) -> CodegenResult<()> { lower_insn_to_regs(ctx, ir_inst, &self.flags, &self.isa_flags) } fn lower_branch_group>( &self, ctx: &mut C, branches: &[IRInst], targets: &[MachLabel], ) -> CodegenResult<()> { lower_branch(ctx, branches, targets) } }