diff --git a/cranelift/codegen/src/isa/aarch64/inst.isle b/cranelift/codegen/src/isa/aarch64/inst.isle index 33ef41c9f2..8272067c57 100644 --- a/cranelift/codegen/src/isa/aarch64/inst.isle +++ b/cranelift/codegen/src/isa/aarch64/inst.isle @@ -1607,6 +1607,12 @@ ;; Extractor helpers for various immmediate constants ;;;;;;;;;;;;;;;;;;;;;;;;;; +(decl pure partial move_wide_const_from_u64 (Type u64) MoveWideConst) +(extern constructor move_wide_const_from_u64 move_wide_const_from_u64) + +(decl pure partial move_wide_const_from_inverted_u64 (Type u64) MoveWideConst) +(extern constructor move_wide_const_from_inverted_u64 move_wide_const_from_inverted_u64) + (decl pure partial imm_logic_from_u64 (Type u64) ImmLogic) (extern constructor imm_logic_from_u64 imm_logic_from_u64) @@ -2747,6 +2753,15 @@ ;; such as `I8` are either sign- or zero-extended. (decl imm (Type ImmExtend u64) Reg) +;; Move wide immediate instructions; to simplify, we only match when we +;; are zero-extending the value. +(rule 3 (imm (integral_ty ty) (ImmExtend.Zero) k) + (if-let n (move_wide_const_from_u64 ty k)) + (movz n (operand_size ty))) +(rule 2 (imm (integral_ty (ty_32_or_64 ty)) (ImmExtend.Zero) k) + (if-let n (move_wide_const_from_inverted_u64 ty k)) + (movn n (operand_size ty))) + ;; Weird logical-instruction immediate in ORI using zero register; to simplify, ;; we only match when we are zero-extending the value. (rule 1 (imm (integral_ty ty) (ImmExtend.Zero) k) @@ -2940,6 +2955,11 @@ (let ((_ Unit (sink_inst x))) (amode ty addr offset))) +;; Lower a constant f32. +(decl constant_f32 (u64) Reg) +;; TODO: Port lower_constant_f32() to ISLE. +(extern constructor constant_f32 constant_f32) + ;; Lower a constant f64. (decl constant_f64 (u64) Reg) ;; TODO: Port lower_constant_f64() to ISLE. diff --git a/cranelift/codegen/src/isa/aarch64/inst/mod.rs b/cranelift/codegen/src/isa/aarch64/inst/mod.rs index c9c2f037f9..518de54d21 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/mod.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/mod.rs @@ -234,27 +234,6 @@ impl Inst { } } - /// Create instructions that load a 128-bit constant. - pub fn load_constant128 Writable>( - to_regs: ValueRegs>, - value: u128, - mut alloc_tmp: F, - ) -> SmallVec<[Inst; 4]> { - assert_eq!(to_regs.len(), 2, "Expected to load i128 into two registers"); - - let lower = value as u64; - let upper = (value >> 64) as u64; - - let lower_reg = to_regs.regs()[0]; - let upper_reg = to_regs.regs()[1]; - - let mut load_ins = Inst::load_constant(lower_reg, lower, &mut alloc_tmp); - let load_upper = Inst::load_constant(upper_reg, upper, &mut alloc_tmp); - - load_ins.extend(load_upper.into_iter()); - load_ins - } - /// Create instructions that load a 32-bit floating-point constant. pub fn load_fp_constant32 Writable>( rd: Writable, @@ -1233,24 +1212,6 @@ impl MachInst for Inst { } } - fn gen_constant Writable>( - to_regs: ValueRegs>, - value: u128, - ty: Type, - mut alloc_tmp: F, - ) -> SmallVec<[Inst; 4]> { - let to_reg = to_regs.only_reg(); - match ty { - F64 => Inst::load_fp_constant64(to_reg.unwrap(), value as u64, alloc_tmp), - F32 => Inst::load_fp_constant32(to_reg.unwrap(), value as u32, alloc_tmp), - I8 | I16 | I32 | I64 | R32 | R64 => { - Inst::load_constant(to_reg.unwrap(), value as u64, &mut alloc_tmp) - } - I128 => Inst::load_constant128(to_regs, value, alloc_tmp), - _ => panic!("Cannot generate constant for type: {}", ty), - } - } - fn gen_dummy_use(reg: Reg) -> Inst { Inst::DummyUse { reg } } diff --git a/cranelift/codegen/src/isa/aarch64/lower.isle b/cranelift/codegen/src/isa/aarch64/lower.isle index 8f22ae599d..87215d66a9 100644 --- a/cranelift/codegen/src/isa/aarch64/lower.isle +++ b/cranelift/codegen/src/isa/aarch64/lower.isle @@ -24,6 +24,21 @@ (rule (lower (has_type ty (null))) (imm ty (ImmExtend.Zero) 0)) +;;;; Rules for `f32const` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule (lower (f32const (u64_from_ieee32 n))) + (constant_f32 n)) + +;;;; Rules for `f64const` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule (lower (f64const (u64_from_ieee64 n))) + (constant_f64 n)) + +;;;; Rules for `nop` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule (lower (nop)) + (invalid_reg)) + ;;;; Rules for `iadd` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; `i64` and smaller diff --git a/cranelift/codegen/src/isa/aarch64/lower.rs b/cranelift/codegen/src/isa/aarch64/lower.rs index 9b17d95b07..55f40ff745 100644 --- a/cranelift/codegen/src/isa/aarch64/lower.rs +++ b/cranelift/codegen/src/isa/aarch64/lower.rs @@ -7,7 +7,6 @@ //! //! - Floating-point immediates (FIMM instruction). -use super::lower_inst; use crate::ir::condcodes::{FloatCC, IntCC}; use crate::ir::types::*; use crate::ir::Inst as IRInst; @@ -16,6 +15,7 @@ use crate::isa::aarch64::inst::*; use crate::isa::aarch64::AArch64Backend; use crate::machinst::lower::*; use crate::machinst::{Reg, Writable}; +use crate::CodegenError; use crate::CodegenResult; use crate::{machinst::*, trace}; use smallvec::{smallvec, SmallVec}; @@ -43,27 +43,6 @@ impl NarrowValueMode { } } -/// Emits instruction(s) to generate the given constant value into newly-allocated -/// temporary registers, returning these registers. -fn generate_constant(ctx: &mut Lower, ty: Type, c: u128) -> ValueRegs { - let from_bits = ty_bits(ty); - let masked = if from_bits < 128 { - c & ((1u128 << from_bits) - 1) - } else { - c - }; - - let cst_copy = ctx.alloc_tmp(ty); - for inst in Inst::gen_constant(cst_copy, masked, ty, |ty| { - ctx.alloc_tmp(ty).only_reg().unwrap() - }) - .into_iter() - { - ctx.emit(inst); - } - non_writable_value_regs(cst_copy) -} - /// Extends a register according to `narrow_mode`. /// If extended, the value is always extended to 64 bits, for simplicity. fn extend_reg( @@ -112,7 +91,20 @@ fn lower_value_to_regs(ctx: &mut Lower, value: Value) -> (ValueRegs, let in_regs = if let Some(c) = inputs.constant { // Generate constants fresh at each use to minimize long-range register pressure. - generate_constant(ctx, ty, c as u128) + let from_bits = ty_bits(ty); + let c = if from_bits < 64 { + c & ((1u64 << from_bits) - 1) + } else { + c + }; + match ty { + I8 | I16 | I32 | I64 | R32 | R64 => { + let cst_copy = ctx.alloc_tmp(ty); + lower_constant_u64(ctx, cst_copy.only_reg().unwrap(), c); + non_writable_value_regs(cst_copy) + } + _ => unreachable!(), // Only used for addresses. + } } else { ctx.put_value_in_regs(value) }; @@ -754,8 +746,227 @@ pub(crate) fn maybe_value_multi( impl LowerBackend for AArch64Backend { type MInst = Inst; - fn lower(&self, ctx: &mut Lower, ir_inst: IRInst) -> CodegenResult<()> { - lower_inst::lower_insn_to_regs(ctx, ir_inst, &self.triple, &self.flags, &self.isa_flags) + fn lower(&self, ctx: &mut Lower, ir_inst: IRInst) -> CodegenResult { + if let Some(temp_regs) = super::lower::isle::lower(ctx, self, ir_inst) { + return Ok(temp_regs); + } + + let op = ctx.data(ir_inst).opcode(); + let ty = if ctx.num_outputs(ir_inst) > 0 { + Some(ctx.output_ty(ir_inst, 0)) + } else { + None + }; + + match op { + Opcode::Iconst + | Opcode::Null + | Opcode::F32const + | Opcode::F64const + | Opcode::GetFramePointer + | Opcode::GetStackPointer + | Opcode::GetReturnAddress + | Opcode::Iadd + | Opcode::Isub + | Opcode::UaddSat + | Opcode::SaddSat + | Opcode::UsubSat + | Opcode::SsubSat + | Opcode::Ineg + | Opcode::Imul + | Opcode::Umulhi + | Opcode::Smulhi + | Opcode::Udiv + | Opcode::Sdiv + | Opcode::Urem + | Opcode::Srem + | Opcode::Uextend + | Opcode::Sextend + | Opcode::Bnot + | Opcode::Band + | Opcode::Bor + | Opcode::Bxor + | Opcode::BandNot + | Opcode::BorNot + | Opcode::BxorNot + | Opcode::Ishl + | Opcode::Ushr + | Opcode::Sshr + | Opcode::Rotr + | Opcode::Rotl + | Opcode::Bitrev + | Opcode::Clz + | Opcode::Cls + | Opcode::Ctz + | Opcode::Bswap + | Opcode::Popcnt + | Opcode::Load + | Opcode::Uload8 + | Opcode::Sload8 + | Opcode::Uload16 + | Opcode::Sload16 + | Opcode::Uload32 + | Opcode::Sload32 + | Opcode::Sload8x8 + | Opcode::Uload8x8 + | Opcode::Sload16x4 + | Opcode::Uload16x4 + | Opcode::Sload32x2 + | Opcode::Uload32x2 + | Opcode::Store + | Opcode::Istore8 + | Opcode::Istore16 + | Opcode::Istore32 + | Opcode::StackAddr + | Opcode::DynamicStackAddr + | Opcode::AtomicRmw + | Opcode::AtomicCas + | Opcode::AtomicLoad + | Opcode::AtomicStore + | Opcode::Fence + | Opcode::Nop + | Opcode::Select + | Opcode::SelectSpectreGuard + | Opcode::Bitselect + | Opcode::Vselect + | Opcode::IsNull + | Opcode::IsInvalid + | Opcode::Ireduce + | Opcode::Bmask + | Opcode::Bitcast + | Opcode::Return + | Opcode::Icmp + | Opcode::Fcmp + | Opcode::Debugtrap + | Opcode::Trap + | Opcode::ResumableTrap + | Opcode::FuncAddr + | Opcode::SymbolValue + | Opcode::Call + | Opcode::CallIndirect + | Opcode::GetPinnedReg + | Opcode::SetPinnedReg + | Opcode::Vconst + | Opcode::Extractlane + | Opcode::Insertlane + | Opcode::Splat + | Opcode::ScalarToVector + | Opcode::VallTrue + | Opcode::VanyTrue + | Opcode::VhighBits + | Opcode::Shuffle + | Opcode::Swizzle + | Opcode::Isplit + | Opcode::Iconcat + | Opcode::Smax + | Opcode::Umax + | Opcode::Umin + | Opcode::Smin + | Opcode::IaddPairwise + | Opcode::WideningPairwiseDotProductS + | Opcode::Fadd + | Opcode::Fsub + | Opcode::Fmul + | Opcode::Fdiv + | Opcode::Fmin + | Opcode::Fmax + | Opcode::FminPseudo + | Opcode::FmaxPseudo + | Opcode::Sqrt + | Opcode::Fneg + | Opcode::Fabs + | Opcode::Fpromote + | Opcode::Fdemote + | Opcode::Ceil + | Opcode::Floor + | Opcode::Trunc + | Opcode::Nearest + | Opcode::Fma + | Opcode::Fcopysign + | Opcode::FcvtToUint + | Opcode::FcvtToSint + | Opcode::FcvtFromUint + | Opcode::FcvtFromSint + | Opcode::FcvtToUintSat + | Opcode::FcvtToSintSat + | Opcode::UaddOverflowTrap + | Opcode::IaddCout + | Opcode::Iabs + | Opcode::AvgRound + | Opcode::Snarrow + | Opcode::Unarrow + | Opcode::Uunarrow + | Opcode::SwidenLow + | Opcode::SwidenHigh + | Opcode::UwidenLow + | Opcode::UwidenHigh + | Opcode::TlsValue + | Opcode::SqmulRoundSat + | Opcode::FcvtLowFromSint + | Opcode::FvpromoteLow + | Opcode::Fvdemote + | Opcode::ExtractVector => { + unreachable!( + "implemented in ISLE: inst = `{}`, type = `{:?}`", + ctx.dfg().display_inst(ir_inst), + ty + ); + } + + Opcode::StackLoad + | Opcode::StackStore + | Opcode::DynamicStackStore + | Opcode::DynamicStackLoad => { + panic!("Direct stack memory access not supported; should not be used by Wasm"); + } + Opcode::HeapLoad | Opcode::HeapStore | Opcode::HeapAddr => { + panic!("heap access instructions should have been removed by legalization!"); + } + Opcode::TableAddr => { + panic!("table_addr should have been removed by legalization!"); + } + Opcode::Trapz | Opcode::Trapnz | Opcode::ResumableTrapnz => { + panic!( + "trapz / trapnz / resumable_trapnz should have been removed by legalization!" + ); + } + Opcode::GlobalValue => { + panic!("global_value should have been removed by legalization!"); + } + Opcode::Jump | Opcode::Brz | Opcode::Brnz | Opcode::BrTable => { + panic!("Branch opcode reached non-branch lowering logic!"); + } + Opcode::IaddImm + | Opcode::ImulImm + | Opcode::UdivImm + | Opcode::SdivImm + | Opcode::UremImm + | Opcode::SremImm + | Opcode::IrsubImm + | Opcode::IaddCin + | Opcode::IaddCarry + | Opcode::IsubBin + | Opcode::IsubBout + | Opcode::IsubBorrow + | Opcode::BandImm + | Opcode::BorImm + | Opcode::BxorImm + | Opcode::RotlImm + | Opcode::RotrImm + | Opcode::IshlImm + | Opcode::UshrImm + | Opcode::SshrImm + | Opcode::IcmpImm => { + panic!("ALU+imm and ALU+carry ops should not appear here!"); + } + + Opcode::Vconcat | Opcode::Vsplit => { + return Err(CodegenError::Unsupported(format!( + "Unimplemented lowering: {}", + op + ))); + } + } } fn lower_branch_group( @@ -776,14 +987,8 @@ impl LowerBackend for AArch64Backend { assert!(op1 == Opcode::Jump); } - if let Ok(()) = super::lower::isle::lower_branch( - ctx, - &self.triple, - &self.flags, - &self.isa_flags, - branches[0], - targets, - ) { + if let Some(temp_regs) = super::lower::isle::lower_branch(ctx, self, branches[0], targets) { + assert!(temp_regs.len() == 0); return Ok(()); } diff --git a/cranelift/codegen/src/isa/aarch64/lower/isle.rs b/cranelift/codegen/src/isa/aarch64/lower/isle.rs index a9a0f674c3..7efd33610a 100644 --- a/cranelift/codegen/src/isa/aarch64/lower/isle.rs +++ b/cranelift/codegen/src/isa/aarch64/lower/isle.rs @@ -18,10 +18,9 @@ use super::{ use crate::ir::condcodes; use crate::isa::aarch64::inst::{FPULeftShiftImm, FPURightShiftImm}; use crate::isa::aarch64::lower::{lower_address, lower_pair_address, lower_splat_const}; -use crate::isa::aarch64::settings::Flags as IsaFlags; +use crate::isa::aarch64::AArch64Backend; use crate::machinst::valueregs; use crate::machinst::{isle::*, InputSourceInst}; -use crate::settings::Flags; use crate::{ binemit::CodeOffset, ir::{ @@ -32,7 +31,7 @@ use crate::{ isa::aarch64::inst::args::{ShiftOp, ShiftOpShiftImm}, isa::unwind::UnwindInst, machinst::{ - abi::ArgPair, ty_bits, InsnOutput, Lower, MachInst, VCodeConstant, VCodeConstantData, + abi::ArgPair, ty_bits, InstOutput, Lower, MachInst, VCodeConstant, VCodeConstantData, }, }; use crate::{isle_common_prelude_methods, isle_lower_prelude_methods}; @@ -40,7 +39,6 @@ use regalloc2::PReg; use std::boxed::Box; use std::convert::TryFrom; use std::vec::Vec; -use target_lexicon::Triple; type BoxCallInfo = Box; type BoxCallIndInfo = Box; @@ -52,40 +50,25 @@ type VecArgPair = Vec; /// The main entry point for lowering with ISLE. pub(crate) fn lower( lower_ctx: &mut Lower, - triple: &Triple, - flags: &Flags, - isa_flags: &IsaFlags, - outputs: &[InsnOutput], + backend: &AArch64Backend, inst: Inst, -) -> Result<(), ()> { - lower_common( - lower_ctx, - triple, - flags, - isa_flags, - outputs, - inst, - |cx, insn| generated_code::constructor_lower(cx, insn), - ) +) -> Option { + // TODO: reuse the ISLE context across lowerings so we can reuse its + // internal heap allocations. + let mut isle_ctx = IsleContext { lower_ctx, backend }; + generated_code::constructor_lower(&mut isle_ctx, inst) } pub(crate) fn lower_branch( lower_ctx: &mut Lower, - triple: &Triple, - flags: &Flags, - isa_flags: &IsaFlags, + backend: &AArch64Backend, branch: Inst, targets: &[MachLabel], -) -> Result<(), ()> { - lower_common( - lower_ctx, - triple, - flags, - isa_flags, - &[], - branch, - |cx, insn| generated_code::constructor_lower_branch(cx, insn, &targets.to_vec()), - ) +) -> Option { + // TODO: reuse the ISLE context across lowerings so we can reuse its + // internal heap allocations. + let mut isle_ctx = IsleContext { lower_ctx, backend }; + generated_code::constructor_lower_branch(&mut isle_ctx, branch, &targets.to_vec()) } pub struct ExtendedValue { @@ -93,16 +76,16 @@ pub struct ExtendedValue { extend: ExtendOp, } -impl IsleContext<'_, '_, MInst, Flags, IsaFlags, 6> { +impl IsleContext<'_, '_, MInst, AArch64Backend> { isle_prelude_method_helpers!(AArch64Caller); } -impl Context for IsleContext<'_, '_, MInst, Flags, IsaFlags, 6> { +impl Context for IsleContext<'_, '_, MInst, AArch64Backend> { isle_lower_prelude_methods!(); isle_prelude_caller_methods!(crate::isa::aarch64::abi::AArch64MachineDeps, AArch64Caller); fn sign_return_address_disabled(&mut self) -> Option<()> { - if self.isa_flags.sign_return_address() { + if self.backend.isa_flags.sign_return_address() { None } else { Some(()) @@ -110,13 +93,27 @@ impl Context for IsleContext<'_, '_, MInst, Flags, IsaFlags, 6> { } fn use_lse(&mut self, _: Inst) -> Option<()> { - if self.isa_flags.has_lse() { + if self.backend.isa_flags.has_lse() { Some(()) } else { None } } + fn move_wide_const_from_u64(&mut self, ty: Type, n: u64) -> Option { + let bits = ty.bits(); + let n = if bits < 64 { + n & !(u64::MAX << bits) + } else { + n + }; + MoveWideConst::maybe_from_u64(n) + } + + fn move_wide_const_from_inverted_u64(&mut self, ty: Type, n: u64) -> Option { + self.move_wide_const_from_u64(ty, !n) + } + fn imm_logic_from_u64(&mut self, ty: Type, n: u64) -> Option { ImmLogic::maybe_from_u64(n, ty) } @@ -523,6 +520,14 @@ impl Context for IsleContext<'_, '_, MInst, Flags, IsaFlags, 6> { lower_pair_address(self.lower_ctx, addr, offset as i32) } + fn constant_f32(&mut self, value: u64) -> Reg { + let rd = self.temp_writable_reg(I8X16); + + lower_constant_f32(self.lower_ctx, rd, f32::from_bits(value as u32)); + + rd.to_reg() + } + fn constant_f64(&mut self, value: u64) -> Reg { let rd = self.temp_writable_reg(I8X16); diff --git a/cranelift/codegen/src/isa/aarch64/lower_inst.rs b/cranelift/codegen/src/isa/aarch64/lower_inst.rs deleted file mode 100644 index 92674f4409..0000000000 --- a/cranelift/codegen/src/isa/aarch64/lower_inst.rs +++ /dev/null @@ -1,306 +0,0 @@ -//! Lower a single Cranelift instruction into vcode. - -use crate::ir::Inst as IRInst; -use crate::ir::Opcode; -use crate::isa::aarch64::inst::*; -use crate::isa::aarch64::settings as aarch64_settings; -use crate::machinst::lower::*; -use crate::machinst::*; -use crate::settings::Flags; -use crate::{CodegenError, CodegenResult}; -use target_lexicon::Triple; - -/// Actually codegen an instruction's results into registers. -pub(crate) fn lower_insn_to_regs( - ctx: &mut Lower, - insn: IRInst, - triple: &Triple, - flags: &Flags, - isa_flags: &aarch64_settings::Flags, -) -> CodegenResult<()> { - let op = ctx.data(insn).opcode(); - let outputs = insn_outputs(ctx, insn); - let ty = if outputs.len() > 0 { - Some(ctx.output_ty(insn, 0)) - } else { - None - }; - - if let Ok(()) = super::lower::isle::lower(ctx, triple, flags, isa_flags, &outputs, insn) { - return Ok(()); - } - - let implemented_in_isle = |ctx: &mut Lower| -> ! { - unreachable!( - "implemented in ISLE: inst = `{}`, type = `{:?}`", - ctx.dfg().display_inst(insn), - ty - ); - }; - - match op { - Opcode::Iconst | Opcode::Null => implemented_in_isle(ctx), - - Opcode::F32const => { - let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); - let val = ctx.get_constant(insn).unwrap(); - for inst in - Inst::load_fp_constant32(rd, val as u32, |ty| ctx.alloc_tmp(ty).only_reg().unwrap()) - { - ctx.emit(inst); - } - } - - Opcode::F64const => { - let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); - let val = ctx.get_constant(insn).unwrap(); - for inst in - Inst::load_fp_constant64(rd, val, |ty| ctx.alloc_tmp(ty).only_reg().unwrap()) - { - ctx.emit(inst); - } - } - - Opcode::GetFramePointer | Opcode::GetStackPointer | Opcode::GetReturnAddress => { - implemented_in_isle(ctx) - } - - Opcode::Iadd => implemented_in_isle(ctx), - Opcode::Isub => implemented_in_isle(ctx), - Opcode::UaddSat | Opcode::SaddSat | Opcode::UsubSat | Opcode::SsubSat => { - implemented_in_isle(ctx) - } - - Opcode::Ineg => implemented_in_isle(ctx), - - Opcode::Imul => implemented_in_isle(ctx), - - Opcode::Umulhi | Opcode::Smulhi => implemented_in_isle(ctx), - - Opcode::Udiv | Opcode::Sdiv | Opcode::Urem | Opcode::Srem => implemented_in_isle(ctx), - - Opcode::Uextend | Opcode::Sextend => implemented_in_isle(ctx), - - Opcode::Bnot => implemented_in_isle(ctx), - - Opcode::Band - | Opcode::Bor - | Opcode::Bxor - | Opcode::BandNot - | Opcode::BorNot - | Opcode::BxorNot => implemented_in_isle(ctx), - - Opcode::Ishl | Opcode::Ushr | Opcode::Sshr => implemented_in_isle(ctx), - - Opcode::Rotr | Opcode::Rotl => implemented_in_isle(ctx), - - Opcode::Bitrev | Opcode::Clz | Opcode::Cls | Opcode::Ctz => implemented_in_isle(ctx), - - Opcode::Bswap => implemented_in_isle(ctx), - - Opcode::Popcnt => implemented_in_isle(ctx), - - Opcode::Load - | Opcode::Uload8 - | Opcode::Sload8 - | Opcode::Uload16 - | Opcode::Sload16 - | Opcode::Uload32 - | Opcode::Sload32 - | Opcode::Sload8x8 - | Opcode::Uload8x8 - | Opcode::Sload16x4 - | Opcode::Uload16x4 - | Opcode::Sload32x2 - | Opcode::Uload32x2 => implemented_in_isle(ctx), - - Opcode::Store | Opcode::Istore8 | Opcode::Istore16 | Opcode::Istore32 => { - implemented_in_isle(ctx) - } - - Opcode::StackAddr => implemented_in_isle(ctx), - - Opcode::DynamicStackAddr => implemented_in_isle(ctx), - - Opcode::AtomicRmw => implemented_in_isle(ctx), - - Opcode::AtomicCas => implemented_in_isle(ctx), - - Opcode::AtomicLoad => implemented_in_isle(ctx), - - Opcode::AtomicStore => implemented_in_isle(ctx), - - Opcode::Fence => implemented_in_isle(ctx), - - Opcode::StackLoad - | Opcode::StackStore - | Opcode::DynamicStackStore - | Opcode::DynamicStackLoad => { - panic!("Direct stack memory access not supported; should not be used by Wasm"); - } - - Opcode::HeapLoad | Opcode::HeapStore | Opcode::HeapAddr => { - panic!("heap access instructions should have been removed by legalization!"); - } - - Opcode::TableAddr => { - panic!("table_addr should have been removed by legalization!"); - } - - Opcode::Nop => { - // Nothing. - } - - Opcode::Select => implemented_in_isle(ctx), - - Opcode::SelectSpectreGuard => implemented_in_isle(ctx), - - Opcode::Bitselect | Opcode::Vselect => implemented_in_isle(ctx), - - Opcode::IsNull | Opcode::IsInvalid => implemented_in_isle(ctx), - - Opcode::Ireduce => implemented_in_isle(ctx), - - Opcode::Bmask => implemented_in_isle(ctx), - - Opcode::Bitcast => implemented_in_isle(ctx), - - Opcode::Return => implemented_in_isle(ctx), - - Opcode::Icmp => implemented_in_isle(ctx), - - Opcode::Fcmp => implemented_in_isle(ctx), - - Opcode::Debugtrap => implemented_in_isle(ctx), - - Opcode::Trap | Opcode::ResumableTrap => implemented_in_isle(ctx), - - Opcode::Trapz | Opcode::Trapnz | Opcode::ResumableTrapnz => { - panic!("trapz / trapnz / resumable_trapnz should have been removed by legalization!"); - } - - Opcode::FuncAddr => implemented_in_isle(ctx), - - Opcode::GlobalValue => { - panic!("global_value should have been removed by legalization!"); - } - - Opcode::SymbolValue => implemented_in_isle(ctx), - - Opcode::Call | Opcode::CallIndirect => implemented_in_isle(ctx), - - Opcode::GetPinnedReg | Opcode::SetPinnedReg => implemented_in_isle(ctx), - - Opcode::Jump | Opcode::Brz | Opcode::Brnz | Opcode::BrTable => { - panic!("Branch opcode reached non-branch lowering logic!"); - } - - Opcode::Vconst => implemented_in_isle(ctx), - - Opcode::Extractlane => implemented_in_isle(ctx), - - Opcode::Insertlane => implemented_in_isle(ctx), - - Opcode::Splat => implemented_in_isle(ctx), - - Opcode::ScalarToVector => implemented_in_isle(ctx), - - Opcode::VallTrue | Opcode::VanyTrue => implemented_in_isle(ctx), - - Opcode::VhighBits => implemented_in_isle(ctx), - - Opcode::Shuffle => implemented_in_isle(ctx), - - Opcode::Swizzle => implemented_in_isle(ctx), - - Opcode::Isplit => implemented_in_isle(ctx), - - Opcode::Iconcat => implemented_in_isle(ctx), - - Opcode::Smax | Opcode::Umax | Opcode::Umin | Opcode::Smin => implemented_in_isle(ctx), - - Opcode::IaddPairwise => implemented_in_isle(ctx), - - Opcode::WideningPairwiseDotProductS => implemented_in_isle(ctx), - - Opcode::Fadd | Opcode::Fsub | Opcode::Fmul | Opcode::Fdiv | Opcode::Fmin | Opcode::Fmax => { - implemented_in_isle(ctx) - } - - Opcode::FminPseudo | Opcode::FmaxPseudo => implemented_in_isle(ctx), - - Opcode::Sqrt | Opcode::Fneg | Opcode::Fabs | Opcode::Fpromote | Opcode::Fdemote => { - implemented_in_isle(ctx) - } - - Opcode::Ceil | Opcode::Floor | Opcode::Trunc | Opcode::Nearest => implemented_in_isle(ctx), - - Opcode::Fma => implemented_in_isle(ctx), - - Opcode::Fcopysign => implemented_in_isle(ctx), - - Opcode::FcvtToUint | Opcode::FcvtToSint => implemented_in_isle(ctx), - - Opcode::FcvtFromUint | Opcode::FcvtFromSint => implemented_in_isle(ctx), - - Opcode::FcvtToUintSat | Opcode::FcvtToSintSat => implemented_in_isle(ctx), - - Opcode::UaddOverflowTrap => implemented_in_isle(ctx), - - Opcode::IaddCout => implemented_in_isle(ctx), - - Opcode::IaddImm - | Opcode::ImulImm - | Opcode::UdivImm - | Opcode::SdivImm - | Opcode::UremImm - | Opcode::SremImm - | Opcode::IrsubImm - | Opcode::IaddCin - | Opcode::IaddCarry - | Opcode::IsubBin - | Opcode::IsubBout - | Opcode::IsubBorrow - | Opcode::BandImm - | Opcode::BorImm - | Opcode::BxorImm - | Opcode::RotlImm - | Opcode::RotrImm - | Opcode::IshlImm - | Opcode::UshrImm - | Opcode::SshrImm - | Opcode::IcmpImm => { - panic!("ALU+imm and ALU+carry ops should not appear here!"); - } - - Opcode::Iabs => implemented_in_isle(ctx), - Opcode::AvgRound => implemented_in_isle(ctx), - - Opcode::Snarrow | Opcode::Unarrow | Opcode::Uunarrow => implemented_in_isle(ctx), - - Opcode::SwidenLow | Opcode::SwidenHigh | Opcode::UwidenLow | Opcode::UwidenHigh => { - implemented_in_isle(ctx) - } - - Opcode::TlsValue => implemented_in_isle(ctx), - - Opcode::SqmulRoundSat => implemented_in_isle(ctx), - - Opcode::FcvtLowFromSint => implemented_in_isle(ctx), - - Opcode::FvpromoteLow => implemented_in_isle(ctx), - - Opcode::Fvdemote => implemented_in_isle(ctx), - - Opcode::ExtractVector => implemented_in_isle(ctx), - - Opcode::Vconcat | Opcode::Vsplit => { - return Err(CodegenError::Unsupported(format!( - "Unimplemented lowering: {}", - op - ))); - } - } - - Ok(()) -} diff --git a/cranelift/codegen/src/isa/aarch64/mod.rs b/cranelift/codegen/src/isa/aarch64/mod.rs index 72459c89e9..a8a94c23bb 100644 --- a/cranelift/codegen/src/isa/aarch64/mod.rs +++ b/cranelift/codegen/src/isa/aarch64/mod.rs @@ -21,7 +21,6 @@ use target_lexicon::{Aarch64Architecture, Architecture, OperatingSystem, Triple} mod abi; pub(crate) mod inst; mod lower; -mod lower_inst; mod settings; use inst::create_reg_env; @@ -267,11 +266,11 @@ mod test { // on it to update: // > aarch64-linux-gnu-objdump -b binary -D -m aarch64 // - // 0: d2824682 mov x2, #0x1234 // #4660 + // 0: 52824682 mov w2, #0x1234 // #4660 // 4: 0b020000 add w0, w0, w2 // 8: d65f03c0 ret - let golden = vec![130, 70, 130, 210, 0, 0, 2, 11, 192, 3, 95, 214]; + let golden = vec![130, 70, 130, 82, 0, 0, 2, 11, 192, 3, 95, 214]; assert_eq!(code, &golden[..]); } @@ -325,24 +324,24 @@ mod test { // on it to update: // > aarch64-linux-gnu-objdump -b binary -D -m aarch64 // - // 0: d2824689 mov x9, #0x1234 // #4660 + // 0: 52824689 mov w9, #0x1234 // #4660 // 4: 0b09000b add w11, w0, w9 // 8: 2a0b03ea mov w10, w11 // c: b50000aa cbnz x10, 0x20 - // 10: d282468c mov x12, #0x1234 // #4660 + // 10: 5282468c mov w12, #0x1234 // #4660 // 14: 0b0c016e add w14, w11, w12 // 18: 2a0e03ed mov w13, w14 // 1c: b5ffffad cbnz x13, 0x10 // 20: 2a0b03e0 mov w0, w11 // 24: b5ffff60 cbnz x0, 0x10 - // 28: d2824681 mov x1, #0x1234 // #4660 + // 28: 52824681 mov w1, #0x1234 // #4660 // 2c: 4b010160 sub w0, w11, w1 // 30: d65f03c0 ret let golden = vec![ - 137, 70, 130, 210, 11, 0, 9, 11, 234, 3, 11, 42, 170, 0, 0, 181, 140, 70, 130, 210, - 110, 1, 12, 11, 237, 3, 14, 42, 173, 255, 255, 181, 224, 3, 11, 42, 96, 255, 255, 181, - 129, 70, 130, 210, 96, 1, 1, 75, 192, 3, 95, 214, + 137, 70, 130, 82, 11, 0, 9, 11, 234, 3, 11, 42, 170, 0, 0, 181, 140, 70, 130, 82, 110, + 1, 12, 11, 237, 3, 14, 42, 173, 255, 255, 181, 224, 3, 11, 42, 96, 255, 255, 181, 129, + 70, 130, 82, 96, 1, 1, 75, 192, 3, 95, 214, ]; assert_eq!(code, &golden[..]); @@ -412,18 +411,17 @@ mod test { // 1c: d61f00e0 br x7 // 20: 00000010 udf #16 // 24: 00000018 udf #24 - // 28: d2800060 mov x0, #0x3 // #3 + // 28: 52800060 mov w0, #0x3 // #3 // 2c: d65f03c0 ret - // 30: d2800020 mov x0, #0x1 // #1 + // 30: 52800020 mov w0, #0x1 // #1 // 34: d65f03c0 ret - // 38: d2800040 mov x0, #0x2 // #2 + // 38: 52800040 mov w0, #0x2 // #2 // 3c: d65f03c0 ret let golden = vec![ 31, 8, 0, 113, 34, 1, 0, 84, 232, 35, 128, 154, 159, 34, 3, 213, 135, 0, 0, 16, 232, 88, 168, 184, 231, 0, 8, 139, 224, 0, 31, 214, 16, 0, 0, 0, 24, 0, 0, 0, 96, 0, 128, - 210, 192, 3, 95, 214, 32, 0, 128, 210, 192, 3, 95, 214, 64, 0, 128, 210, 192, 3, 95, - 214, + 82, 192, 3, 95, 214, 32, 0, 128, 82, 192, 3, 95, 214, 64, 0, 128, 82, 192, 3, 95, 214, ]; assert_eq!(code, &golden[..]); diff --git a/cranelift/codegen/src/isa/riscv64/inst/mod.rs b/cranelift/codegen/src/isa/riscv64/inst/mod.rs index ac1cc08e7b..5022e8d31b 100644 --- a/cranelift/codegen/src/isa/riscv64/inst/mod.rs +++ b/cranelift/codegen/src/isa/riscv64/inst/mod.rs @@ -720,44 +720,6 @@ impl MachInst for Inst { x } - fn gen_constant Writable>( - to_regs: ValueRegs>, - value: u128, - ty: Type, - mut alloc_tmp: F, - ) -> SmallVec<[Inst; 4]> { - if (ty.bits() <= 64 && ty.is_int()) || ty == R32 || ty == R64 { - return Inst::load_constant_u64( - to_regs.only_reg().unwrap(), - value as u64, - &mut alloc_tmp, - ); - }; - match ty { - F32 => { - Inst::load_fp_constant32(to_regs.only_reg().unwrap(), value as u32, &mut alloc_tmp) - } - F64 => { - Inst::load_fp_constant64(to_regs.only_reg().unwrap(), value as u64, &mut alloc_tmp) - } - I128 => { - let mut insts = SmallInstVec::new(); - insts.extend(Inst::load_constant_u64( - to_regs.regs()[0], - (value >> 64) as u64, - &mut alloc_tmp, - )); - insts.extend(Inst::load_constant_u64( - to_regs.regs()[1], - value as u64, - &mut alloc_tmp, - )); - return insts; - } - _ => unreachable!("vector type not implemented now."), - } - } - fn gen_nop(preferred_size: usize) -> Inst { if preferred_size == 0 { return Inst::Nop0; diff --git a/cranelift/codegen/src/isa/riscv64/lower.isle b/cranelift/codegen/src/isa/riscv64/lower.isle index 567f7e7c34..219f48e902 100644 --- a/cranelift/codegen/src/isa/riscv64/lower.isle +++ b/cranelift/codegen/src/isa/riscv64/lower.isle @@ -9,6 +9,16 @@ (rule (lower (has_type ty (iconst (u64_from_imm64 n)))) (imm ty n)) +;;;; Rules for `f32const` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule (lower (f32const (u64_from_ieee32 n))) + (imm $F32 n)) + +;;;; Rules for `f64const` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule (lower (f64const (u64_from_ieee64 n))) + (imm $F64 n)) + ;;;; Rules for `null` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule (lower (has_type ty (null))) diff --git a/cranelift/codegen/src/isa/riscv64/lower.rs b/cranelift/codegen/src/isa/riscv64/lower.rs index 840071c860..7e93958125 100644 --- a/cranelift/codegen/src/isa/riscv64/lower.rs +++ b/cranelift/codegen/src/isa/riscv64/lower.rs @@ -1,5 +1,4 @@ //! Lowering rules for Riscv64. -use super::lower_inst; use crate::ir::Inst as IRInst; use crate::isa::riscv64::inst::*; use crate::isa::riscv64::Riscv64Backend; @@ -14,8 +13,22 @@ pub mod isle; impl LowerBackend for Riscv64Backend { type MInst = Inst; - fn lower(&self, ctx: &mut Lower, ir_inst: IRInst) -> CodegenResult<()> { - lower_inst::lower_insn_to_regs(ctx, ir_inst, &self.triple, &self.flags, &self.isa_flags) + fn lower(&self, ctx: &mut Lower, ir_inst: IRInst) -> CodegenResult { + if let Some(temp_regs) = super::lower::isle::lower(ctx, self, ir_inst) { + return Ok(temp_regs); + } + + let ty = if ctx.num_outputs(ir_inst) > 0 { + Some(ctx.output_ty(ir_inst, 0)) + } else { + None + }; + + unreachable!( + "not implemented in ISLE: inst = `{}`, type = `{:?}`", + ctx.dfg().display_inst(ir_inst), + ty + ); } fn lower_branch_group( @@ -38,14 +51,8 @@ impl LowerBackend for Riscv64Backend { // Lower the first branch in ISLE. This will automatically handle // the second branch (if any) by emitting a two-way conditional branch. - if let Ok(()) = super::lower::isle::lower_branch( - ctx, - &self.triple, - &self.flags, - &self.isa_flags, - branches[0], - targets, - ) { + if let Some(temp_regs) = super::lower::isle::lower_branch(ctx, self, branches[0], targets) { + assert!(temp_regs.len() == 0); return Ok(()); } unreachable!( diff --git a/cranelift/codegen/src/isa/riscv64/lower/isle.rs b/cranelift/codegen/src/isa/riscv64/lower/isle.rs index caee8fed59..8b8eb1e11c 100644 --- a/cranelift/codegen/src/isa/riscv64/lower/isle.rs +++ b/cranelift/codegen/src/isa/riscv64/lower/isle.rs @@ -8,25 +8,23 @@ use generated_code::{Context, MInst}; // Types that the generated ISLE code uses via `use super::*`. use super::{writable_zero_reg, zero_reg}; use crate::isa::riscv64::abi::Riscv64ABICaller; -use crate::isa::riscv64::settings::Flags as IsaFlags; +use crate::isa::riscv64::Riscv64Backend; use crate::machinst::Reg; use crate::machinst::{isle::*, MachInst, SmallInstVec}; use crate::machinst::{VCodeConstant, VCodeConstantData}; -use crate::settings::Flags; use crate::{ ir::{ immediates::*, types::*, AtomicRmwOp, ExternalName, Inst, InstructionData, MemFlags, StackSlot, TrapCode, Value, ValueList, }, isa::riscv64::inst::*, - machinst::{ArgPair, InsnOutput, Lower}, + machinst::{ArgPair, InstOutput, Lower}, }; use crate::{isle_common_prelude_methods, isle_lower_prelude_methods}; use regalloc2::PReg; use std::boxed::Box; use std::convert::TryFrom; use std::vec::Vec; -use target_lexicon::Triple; type BoxCallInfo = Box; type BoxCallIndInfo = Box; @@ -38,28 +36,20 @@ use crate::machinst::valueregs; /// The main entry point for lowering with ISLE. pub(crate) fn lower( lower_ctx: &mut Lower, - flags: &Flags, - triple: &Triple, - isa_flags: &IsaFlags, - outputs: &[InsnOutput], + backend: &Riscv64Backend, inst: Inst, -) -> Result<(), ()> { - lower_common( - lower_ctx, - triple, - flags, - isa_flags, - outputs, - inst, - |cx, insn| generated_code::constructor_lower(cx, insn), - ) +) -> Option { + // TODO: reuse the ISLE context across lowerings so we can reuse its + // internal heap allocations. + let mut isle_ctx = IsleContext { lower_ctx, backend }; + generated_code::constructor_lower(&mut isle_ctx, inst) } -impl IsleContext<'_, '_, MInst, Flags, IsaFlags, 6> { +impl IsleContext<'_, '_, MInst, Riscv64Backend> { isle_prelude_method_helpers!(Riscv64ABICaller); } -impl generated_code::Context for IsleContext<'_, '_, MInst, Flags, IsaFlags, 6> { +impl generated_code::Context for IsleContext<'_, '_, MInst, Riscv64Backend> { isle_lower_prelude_methods!(); isle_prelude_caller_methods!(Riscv64MachineDeps, Riscv64ABICaller); @@ -134,7 +124,7 @@ impl generated_code::Context for IsleContext<'_, '_, MInst, Flags, IsaFlags, 6> InstOutput::default() } fn load_ra(&mut self) -> Reg { - if self.flags.preserve_frame_pointers() { + if self.backend.flags.preserve_frame_pointers() { let tmp = self.temp_writable_reg(I64); self.emit(&MInst::Load { rd: tmp, @@ -198,8 +188,13 @@ impl generated_code::Context for IsleContext<'_, '_, MInst, Flags, IsaFlags, 6> fn imm(&mut self, ty: Type, val: u64) -> Reg { let tmp = self.temp_writable_reg(ty); - let insts = &MInst::load_constant_u64(tmp, val, &mut |ty| self.temp_writable_reg(ty)); - self.emit_list(insts); + let alloc_tmp = &mut |ty| self.temp_writable_reg(ty); + let insts = match ty { + F32 => MInst::load_fp_constant32(tmp, val as u32, alloc_tmp), + F64 => MInst::load_fp_constant64(tmp, val, alloc_tmp), + _ => MInst::load_constant_u64(tmp, val, alloc_tmp), + }; + self.emit_list(&insts); tmp.to_reg() } #[inline] @@ -309,10 +304,10 @@ impl generated_code::Context for IsleContext<'_, '_, MInst, Flags, IsaFlags, 6> } fn has_b(&mut self) -> bool { - self.isa_flags.has_b() + self.backend.isa_flags.has_b() } fn has_zbkb(&mut self) -> bool { - self.isa_flags.has_zbkb() + self.backend.isa_flags.has_zbkb() } fn inst_output_get(&mut self, x: InstOutput, index: u8) -> ValueRegs { @@ -436,7 +431,7 @@ impl generated_code::Context for IsleContext<'_, '_, MInst, Flags, IsaFlags, 6> } } -impl IsleContext<'_, '_, MInst, Flags, IsaFlags, 6> { +impl IsleContext<'_, '_, MInst, Riscv64Backend> { #[inline] fn emit_list(&mut self, list: &SmallInstVec) { for i in list { @@ -448,21 +443,14 @@ impl IsleContext<'_, '_, MInst, Flags, IsaFlags, 6> { /// The main entry point for branch lowering with ISLE. pub(crate) fn lower_branch( lower_ctx: &mut Lower, - triple: &Triple, - flags: &Flags, - isa_flags: &IsaFlags, + backend: &Riscv64Backend, branch: Inst, targets: &[MachLabel], -) -> Result<(), ()> { - lower_common( - lower_ctx, - triple, - flags, - isa_flags, - &[], - branch, - |cx, insn| generated_code::constructor_lower_branch(cx, insn, &targets.to_vec()), - ) +) -> Option { + // TODO: reuse the ISLE context across lowerings so we can reuse its + // internal heap allocations. + let mut isle_ctx = IsleContext { lower_ctx, backend }; + generated_code::constructor_lower_branch(&mut isle_ctx, branch, &targets.to_vec()) } /// construct destination according to ty. diff --git a/cranelift/codegen/src/isa/riscv64/lower_inst.rs b/cranelift/codegen/src/isa/riscv64/lower_inst.rs deleted file mode 100644 index db0f4e5a57..0000000000 --- a/cranelift/codegen/src/isa/riscv64/lower_inst.rs +++ /dev/null @@ -1,36 +0,0 @@ -//! Lower a single Cranelift instruction into vcode. - -use crate::ir::Inst as IRInst; - -use crate::isa::riscv64::settings as riscv64_settings; -use crate::machinst::lower::*; -use crate::machinst::*; -use crate::settings::Flags; -use crate::CodegenResult; - -use crate::isa::riscv64::inst::*; -use target_lexicon::Triple; - -/// Actually codegen an instruction's results into registers. -pub(crate) fn lower_insn_to_regs( - ctx: &mut Lower, - insn: IRInst, - triple: &Triple, - flags: &Flags, - isa_flags: &riscv64_settings::Flags, -) -> CodegenResult<()> { - let outputs = insn_outputs(ctx, insn); - let ty = if outputs.len() > 0 { - Some(ctx.output_ty(insn, 0)) - } else { - None - }; - if let Ok(()) = super::lower::isle::lower(ctx, flags, triple, isa_flags, &outputs, insn) { - return Ok(()); - } - unreachable!( - "not implemented in ISLE: inst = `{}`, type = `{:?}`", - ctx.dfg().display_inst(insn), - ty - ); -} diff --git a/cranelift/codegen/src/isa/riscv64/mod.rs b/cranelift/codegen/src/isa/riscv64/mod.rs index 8c8fbb80bc..07ca657c74 100644 --- a/cranelift/codegen/src/isa/riscv64/mod.rs +++ b/cranelift/codegen/src/isa/riscv64/mod.rs @@ -19,7 +19,6 @@ use target_lexicon::{Architecture, Triple}; mod abi; pub(crate) mod inst; mod lower; -mod lower_inst; mod settings; #[cfg(feature = "unwind")] use crate::isa::unwind::systemv; diff --git a/cranelift/codegen/src/isa/s390x/inst.isle b/cranelift/codegen/src/isa/s390x/inst.isle index f218a1610c..9e252a44ef 100644 --- a/cranelift/codegen/src/isa/s390x/inst.isle +++ b/cranelift/codegen/src/isa/s390x/inst.isle @@ -2983,49 +2983,49 @@ (decl imm (Type u64) Reg) ;; 16-bit (or smaller) result type, any value -(rule 5 (imm (fits_in_16 ty) n) +(rule 7 (imm (fits_in_16 ty) n) (let ((dst WritableReg (temp_writable_reg ty)) (_ Unit (emit (MInst.Mov32SImm16 dst (u64_as_i16 n))))) dst)) ;; 32-bit result type, value fits in i16 -(rule 4 (imm (gpr32_ty ty) (i16_from_u64 n)) +(rule 6 (imm (gpr32_ty ty) (i16_from_u64 n)) (let ((dst WritableReg (temp_writable_reg ty)) (_ Unit (emit (MInst.Mov32SImm16 dst n)))) dst)) ;; 32-bit result type, any value -(rule 3 (imm (gpr32_ty ty) n) +(rule 5 (imm (gpr32_ty ty) n) (let ((dst WritableReg (temp_writable_reg ty)) (_ Unit (emit (MInst.Mov32Imm dst (u64_as_u32 n))))) dst)) ;; 64-bit result type, value fits in i16 -(rule 6 (imm (gpr64_ty ty) (i16_from_u64 n)) +(rule 4 (imm (gpr64_ty ty) (i16_from_u64 n)) (let ((dst WritableReg (temp_writable_reg ty)) (_ Unit (emit (MInst.Mov64SImm16 dst n)))) dst)) ;; 64-bit result type, value fits in i32 -(rule 2 (imm (gpr64_ty ty) (i32_from_u64 n)) +(rule 3 (imm (gpr64_ty ty) (i32_from_u64 n)) (let ((dst WritableReg (temp_writable_reg ty)) (_ Unit (emit (MInst.Mov64SImm32 dst n)))) dst)) ;; 64-bit result type, value fits in UImm16Shifted -(rule 1 (imm (gpr64_ty ty) (uimm16shifted_from_u64 n)) +(rule 2 (imm (gpr64_ty ty) (uimm16shifted_from_u64 n)) (let ((dst WritableReg (temp_writable_reg ty)) (_ Unit (emit (MInst.Mov64UImm16Shifted dst n)))) dst)) ;; 64-bit result type, value fits in UImm32Shifted -(rule 0 (imm (gpr64_ty ty) (uimm32shifted_from_u64 n)) +(rule 1 (imm (gpr64_ty ty) (uimm32shifted_from_u64 n)) (let ((dst WritableReg (temp_writable_reg ty)) (_ Unit (emit (MInst.Mov64UImm32Shifted dst n)))) dst)) ;; 64-bit result type, value with non-zero low-/high-parts. -(rule 7 (imm (gpr64_ty ty) (and (u64_nonzero_hipart hi) +(rule 0 (imm (gpr64_ty ty) (and (u64_nonzero_hipart hi) (u64_nonzero_lopart lo))) (insert_imm ty (imm ty hi) lo)) diff --git a/cranelift/codegen/src/isa/s390x/inst/emit.rs b/cranelift/codegen/src/isa/s390x/inst/emit.rs index eb7c48bc59..4b5eb35ff4 100644 --- a/cranelift/codegen/src/isa/s390x/inst/emit.rs +++ b/cranelift/codegen/src/isa/s390x/inst/emit.rs @@ -104,7 +104,15 @@ pub fn mem_finalize( } else { let tmp = writable_spilltmp_reg(); assert!(base != tmp.to_reg()); - insts.extend(Inst::load_constant64(tmp, off as u64, |_| tmp)); + if let Ok(imm) = i16::try_from(off) { + insts.push(Inst::Mov64SImm16 { rd: tmp, imm }); + } else if let Ok(imm) = i32::try_from(off) { + insts.push(Inst::Mov64SImm32 { rd: tmp, imm }); + } else { + // The offset must be smaller than the stack frame size, + // which the ABI code limits to 128 MB. + unreachable!(); + } MemArg::reg_plus_reg(base, tmp.to_reg(), mem.get_flags()) } } diff --git a/cranelift/codegen/src/isa/s390x/inst/emit_tests.rs b/cranelift/codegen/src/isa/s390x/inst/emit_tests.rs index 27b0ba7dfe..e2cb0e0537 100644 --- a/cranelift/codegen/src/isa/s390x/inst/emit_tests.rs +++ b/cranelift/codegen/src/isa/s390x/inst/emit_tests.rs @@ -3,6 +3,7 @@ use crate::isa::s390x::inst::*; use crate::isa::s390x::settings as s390x_settings; use crate::settings; use alloc::vec::Vec; +use smallvec::smallvec; #[cfg(test)] fn simm20_zero() -> SImm20 { @@ -6366,30 +6367,6 @@ fn test_s390x_binemit() { "C0117FFFFFFF41112000", "lgfi %r1, 2147483647 ; la %r1, 0(%r1,%r2)", )); - insns.push(( - Inst::LoadAddr { - rd: writable_gpr(1), - mem: MemArg::RegOffset { - reg: gpr(2), - off: -9223372036854775808, - flags: MemFlags::trusted(), - }, - }, - "A51C800041112000", - "llihh %r1, 32768 ; la %r1, 0(%r1,%r2)", - )); - insns.push(( - Inst::LoadAddr { - rd: writable_gpr(1), - mem: MemArg::RegOffset { - reg: gpr(2), - off: 9223372036854775807, - flags: MemFlags::trusted(), - }, - }, - "C01E7FFFFFFFC019FFFFFFFF41112000", - "llihf %r1, 2147483647 ; iilf %r1, 4294967295 ; la %r1, 0(%r1,%r2)", - )); insns.push(( Inst::Mov64 { diff --git a/cranelift/codegen/src/isa/s390x/inst/mod.rs b/cranelift/codegen/src/isa/s390x/inst/mod.rs index 455065806b..722915e9b5 100644 --- a/cranelift/codegen/src/isa/s390x/inst/mod.rs +++ b/cranelift/codegen/src/isa/s390x/inst/mod.rs @@ -8,9 +8,8 @@ use crate::machinst::*; use crate::{settings, CodegenError, CodegenResult}; use alloc::boxed::Box; use alloc::vec::Vec; -use core::convert::TryFrom; use regalloc2::{PRegSet, VReg}; -use smallvec::{smallvec, SmallVec}; +use smallvec::SmallVec; use std::string::{String, ToString}; pub mod regs; pub use self::regs::*; @@ -334,102 +333,6 @@ impl Inst { } } - /// Create an instruction that loads a 64-bit integer constant. - pub fn load_constant64 Writable>( - rd: Writable, - value: u64, - mut alloc_tmp: F, - ) -> SmallVec<[Inst; 4]> { - if let Ok(imm) = i16::try_from(value as i64) { - // 16-bit signed immediate - smallvec![Inst::Mov64SImm16 { rd, imm }] - } else if let Ok(imm) = i32::try_from(value as i64) { - // 32-bit signed immediate - smallvec![Inst::Mov64SImm32 { rd, imm }] - } else if let Some(imm) = UImm16Shifted::maybe_from_u64(value) { - // 16-bit shifted immediate - smallvec![Inst::Mov64UImm16Shifted { rd, imm }] - } else if let Some(imm) = UImm32Shifted::maybe_from_u64(value) { - // 32-bit shifted immediate - smallvec![Inst::Mov64UImm32Shifted { rd, imm }] - } else { - let mut insts = smallvec![]; - let hi = value & 0xffff_ffff_0000_0000u64; - let lo = value & 0x0000_0000_ffff_ffffu64; - - let hi_rd = alloc_tmp(types::I64); - if let Some(imm) = UImm16Shifted::maybe_from_u64(hi) { - // 16-bit shifted immediate - insts.push(Inst::Mov64UImm16Shifted { rd: hi_rd, imm }); - } else if let Some(imm) = UImm32Shifted::maybe_from_u64(hi) { - // 32-bit shifted immediate - insts.push(Inst::Mov64UImm32Shifted { rd: hi_rd, imm }); - } else { - unreachable!(); - } - - if let Some(imm) = UImm16Shifted::maybe_from_u64(lo) { - // 16-bit shifted immediate - insts.push(Inst::Insert64UImm16Shifted { - rd, - ri: hi_rd.to_reg(), - imm, - }); - } else if let Some(imm) = UImm32Shifted::maybe_from_u64(lo) { - // 32-bit shifted immediate - insts.push(Inst::Insert64UImm32Shifted { - rd, - ri: hi_rd.to_reg(), - imm, - }); - } else { - unreachable!(); - } - - insts - } - } - - /// Create an instruction that loads a 32-bit integer constant. - pub fn load_constant32(rd: Writable, value: u32) -> SmallVec<[Inst; 4]> { - if let Ok(imm) = i16::try_from(value as i32) { - // 16-bit signed immediate - smallvec![Inst::Mov32SImm16 { rd, imm }] - } else { - // 32-bit full immediate - smallvec![Inst::Mov32Imm { rd, imm: value }] - } - } - - /// Create an instruction that loads a 32-bit floating-point constant. - pub fn load_fp_constant32(rd: Writable, value: f32) -> Inst { - // TODO: use LZER to load 0.0 - Inst::LoadFpuConst32 { - rd, - const_data: value.to_bits(), - } - } - - /// Create an instruction that loads a 64-bit floating-point constant. - pub fn load_fp_constant64(rd: Writable, value: f64) -> Inst { - // TODO: use LZDR to load 0.0 - Inst::LoadFpuConst64 { - rd, - const_data: value.to_bits(), - } - } - - /// Create an instruction that loads a 128-bit floating-point constant. - pub fn load_vec_constant(rd: Writable, value: u128) -> Inst { - // FIXME: This doesn't special-case constants that can be loaded - // without a constant pool, like the ISLE lowering does. Ideally, - // we should not have to duplicate the logic here. - Inst::VecLoadConst { - rd, - const_data: value, - } - } - /// Generic constructor for a load (zero-extending where appropriate). pub fn gen_load(into_reg: Writable, mem: MemArg, ty: Type) -> Inst { match ty { @@ -1180,48 +1083,6 @@ impl MachInst for Inst { } } - fn gen_constant Writable>( - to_regs: ValueRegs>, - value: u128, - ty: Type, - alloc_tmp: F, - ) -> SmallVec<[Inst; 4]> { - let to_reg = to_regs - .only_reg() - .expect("multi-reg values not supported yet"); - match ty { - types::I128 => { - let mut ret = SmallVec::new(); - ret.push(Inst::load_vec_constant(to_reg, value)); - ret - } - _ if ty.is_vector() && ty.bits() == 128 => { - let mut ret = SmallVec::new(); - ret.push(Inst::load_vec_constant(to_reg, value)); - ret - } - types::F64 => { - let mut ret = SmallVec::new(); - ret.push(Inst::load_fp_constant64( - to_reg, - f64::from_bits(value as u64), - )); - ret - } - types::F32 => { - let mut ret = SmallVec::new(); - ret.push(Inst::load_fp_constant32( - to_reg, - f32::from_bits(value as u32), - )); - ret - } - types::I64 | types::R64 => Inst::load_constant64(to_reg, value as u64, alloc_tmp), - types::I8 | types::I16 | types::I32 => Inst::load_constant32(to_reg, value as u32), - _ => unreachable!(), - } - } - fn gen_nop(preferred_size: usize) -> Inst { if preferred_size == 0 { Inst::Nop0 diff --git a/cranelift/codegen/src/isa/s390x/lower.rs b/cranelift/codegen/src/isa/s390x/lower.rs index 7c122713f9..5488791d4f 100644 --- a/cranelift/codegen/src/isa/s390x/lower.rs +++ b/cranelift/codegen/src/isa/s390x/lower.rs @@ -4,9 +4,8 @@ use crate::ir::Inst as IRInst; use crate::ir::Opcode; use crate::isa::s390x::inst::Inst; use crate::isa::s390x::S390xBackend; -use crate::machinst::{InsnOutput, Lower, LowerBackend, MachLabel}; +use crate::machinst::{InstOutput, Lower, LowerBackend, MachLabel}; use crate::CodegenResult; -use smallvec::SmallVec; pub mod isle; @@ -16,31 +15,18 @@ pub mod isle; impl LowerBackend for S390xBackend { type MInst = Inst; - fn lower(&self, ctx: &mut Lower, ir_inst: IRInst) -> CodegenResult<()> { + fn lower(&self, ctx: &mut Lower, ir_inst: IRInst) -> CodegenResult { + if let Some(temp_regs) = super::lower::isle::lower(ctx, self, ir_inst) { + return Ok(temp_regs); + } + let op = ctx.data(ir_inst).opcode(); - let outputs: SmallVec<[InsnOutput; 2]> = (0..ctx.num_outputs(ir_inst)) - .map(|i| InsnOutput { - insn: ir_inst, - output: i, - }) - .collect(); - let ty = if outputs.len() > 0 { + let ty = if ctx.num_outputs(ir_inst) > 0 { Some(ctx.output_ty(ir_inst, 0)) } else { None }; - if let Ok(()) = super::lower::isle::lower( - ctx, - &self.triple, - &self.flags, - &self.isa_flags, - &outputs, - ir_inst, - ) { - return Ok(()); - } - match op { Opcode::Nop | Opcode::Iconst @@ -271,14 +257,8 @@ impl LowerBackend for S390xBackend { // Lower the first branch in ISLE. This will automatically handle // the second branch (if any) by emitting a two-way conditional branch. - if let Ok(()) = super::lower::isle::lower_branch( - ctx, - &self.triple, - &self.flags, - &self.isa_flags, - branches[0], - targets, - ) { + if let Some(temp_regs) = super::lower::isle::lower_branch(ctx, self, branches[0], targets) { + assert!(temp_regs.len() == 0); return Ok(()); } unreachable!( diff --git a/cranelift/codegen/src/isa/s390x/lower/isle.rs b/cranelift/codegen/src/isa/s390x/lower/isle.rs index 4e495a06e3..6c3d243741 100644 --- a/cranelift/codegen/src/isa/s390x/lower/isle.rs +++ b/cranelift/codegen/src/isa/s390x/lower/isle.rs @@ -11,10 +11,9 @@ use crate::isa::s390x::inst::{ MemArg, MemArgPair, RegPair, SymbolReloc, UImm12, UImm16Shifted, UImm32Shifted, WritableRegPair, }; -use crate::isa::s390x::settings::Flags as IsaFlags; +use crate::isa::s390x::S390xBackend; use crate::machinst::isle::*; use crate::machinst::{MachLabel, Reg}; -use crate::settings::Flags; use crate::{ ir::{ condcodes::*, immediates::*, types::*, ArgumentPurpose, AtomicRmwOp, Endianness, Inst, @@ -24,7 +23,7 @@ use crate::{ isa::CallConv, machinst::abi::ABIMachineSpec, machinst::{ - ArgPair, CallArgList, CallArgPair, CallRetList, CallRetPair, InsnOutput, Lower, MachInst, + ArgPair, CallArgList, CallArgPair, CallRetList, CallRetPair, InstOutput, Lower, MachInst, VCodeConstant, VCodeConstantData, }, }; @@ -35,7 +34,6 @@ use std::boxed::Box; use std::cell::Cell; use std::convert::TryFrom; use std::vec::Vec; -use target_lexicon::Triple; /// Information describing a library call to be emitted. pub struct LibCallInfo { @@ -58,44 +56,29 @@ type CallArgListBuilder = Cell; /// The main entry point for lowering with ISLE. pub(crate) fn lower( lower_ctx: &mut Lower, - triple: &Triple, - flags: &Flags, - isa_flags: &IsaFlags, - outputs: &[InsnOutput], + backend: &S390xBackend, inst: Inst, -) -> Result<(), ()> { - lower_common( - lower_ctx, - triple, - flags, - isa_flags, - outputs, - inst, - |cx, insn| generated_code::constructor_lower(cx, insn), - ) +) -> Option { + // TODO: reuse the ISLE context across lowerings so we can reuse its + // internal heap allocations. + let mut isle_ctx = IsleContext { lower_ctx, backend }; + generated_code::constructor_lower(&mut isle_ctx, inst) } /// The main entry point for branch lowering with ISLE. pub(crate) fn lower_branch( lower_ctx: &mut Lower, - triple: &Triple, - flags: &Flags, - isa_flags: &IsaFlags, + backend: &S390xBackend, branch: Inst, targets: &[MachLabel], -) -> Result<(), ()> { - lower_common( - lower_ctx, - triple, - flags, - isa_flags, - &[], - branch, - |cx, insn| generated_code::constructor_lower_branch(cx, insn, &targets.to_vec()), - ) +) -> Option { + // TODO: reuse the ISLE context across lowerings so we can reuse its + // internal heap allocations. + let mut isle_ctx = IsleContext { lower_ctx, backend }; + generated_code::constructor_lower_branch(&mut isle_ctx, branch, &targets.to_vec()) } -impl generated_code::Context for IsleContext<'_, '_, MInst, Flags, IsaFlags, 6> { +impl generated_code::Context for IsleContext<'_, '_, MInst, S390xBackend> { isle_lower_prelude_methods!(); #[inline] @@ -283,7 +266,7 @@ impl generated_code::Context for IsleContext<'_, '_, MInst, Flags, IsaFlags, 6> fn lib_call_info(&mut self, info: &LibCallInfo) -> BoxCallInfo { let caller_callconv = self.lower_ctx.abi().call_conv(self.lower_ctx.sigs()); - let callee_callconv = CallConv::for_libcall(&self.flags, caller_callconv); + let callee_callconv = CallConv::for_libcall(&self.backend.flags, caller_callconv); // Clobbers are defined by the calling convention. Remove defs from clobbers. let mut clobbers = S390xMachineDeps::get_regs_clobbered_by_call(callee_callconv); @@ -310,7 +293,7 @@ impl generated_code::Context for IsleContext<'_, '_, MInst, Flags, IsaFlags, 6> #[inline] fn allow_div_traps(&mut self, _: Type) -> Option<()> { - if !self.flags.avoid_div_traps() { + if !self.backend.flags.avoid_div_traps() { Some(()) } else { None @@ -319,7 +302,7 @@ impl generated_code::Context for IsleContext<'_, '_, MInst, Flags, IsaFlags, 6> #[inline] fn mie2_enabled(&mut self, _: Type) -> Option<()> { - if self.isa_flags.has_mie2() { + if self.backend.isa_flags.has_mie2() { Some(()) } else { None @@ -328,7 +311,7 @@ impl generated_code::Context for IsleContext<'_, '_, MInst, Flags, IsaFlags, 6> #[inline] fn mie2_disabled(&mut self, _: Type) -> Option<()> { - if !self.isa_flags.has_mie2() { + if !self.backend.isa_flags.has_mie2() { Some(()) } else { None @@ -337,7 +320,7 @@ impl generated_code::Context for IsleContext<'_, '_, MInst, Flags, IsaFlags, 6> #[inline] fn vxrs_ext2_enabled(&mut self, _: Type) -> Option<()> { - if self.isa_flags.has_vxrs_ext2() { + if self.backend.isa_flags.has_vxrs_ext2() { Some(()) } else { None @@ -346,7 +329,7 @@ impl generated_code::Context for IsleContext<'_, '_, MInst, Flags, IsaFlags, 6> #[inline] fn vxrs_ext2_disabled(&mut self, _: Type) -> Option<()> { - if !self.isa_flags.has_vxrs_ext2() { + if !self.backend.isa_flags.has_vxrs_ext2() { Some(()) } else { None diff --git a/cranelift/codegen/src/isa/x64/abi.rs b/cranelift/codegen/src/isa/x64/abi.rs index 7a88850c90..cb8a30d0c0 100644 --- a/cranelift/codegen/src/isa/x64/abi.rs +++ b/cranelift/codegen/src/isa/x64/abi.rs @@ -637,12 +637,7 @@ impl ABIMachineSpec for X64ABIMachineSpec { let arg2 = get_intreg_for_arg(&call_conv, 2, 2).unwrap(); let temp = alloc_tmp(Self::word_type()); let temp2 = alloc_tmp(Self::word_type()); - insts.extend( - Inst::gen_constant(ValueRegs::one(temp), size as u128, I64, |_| { - panic!("tmp should not be needed") - }) - .into_iter(), - ); + insts.push(Inst::imm(OperandSize::Size64, size as u64, temp)); // We use an indirect call and a full LoadExtName because we do not have // information about the libcall `RelocDistance` here, so we // conservatively use the more flexible calling sequence. diff --git a/cranelift/codegen/src/isa/x64/inst/mod.rs b/cranelift/codegen/src/isa/x64/inst/mod.rs index 0357d81ac9..ade4ef0a83 100644 --- a/cranelift/codegen/src/isa/x64/inst/mod.rs +++ b/cranelift/codegen/src/isa/x64/inst/mod.rs @@ -2336,110 +2336,6 @@ impl MachInst for Inst { Inst::jmp_known(label) } - fn gen_constant Writable>( - to_regs: ValueRegs>, - value: u128, - ty: Type, - mut alloc_tmp: F, - ) -> SmallVec<[Self; 4]> { - let mut ret = SmallVec::new(); - if ty == types::I128 { - let lo = value as u64; - let hi = (value >> 64) as u64; - let lo_reg = to_regs.regs()[0]; - let hi_reg = to_regs.regs()[1]; - if lo == 0 { - ret.push(Inst::alu_rmi_r( - OperandSize::Size64, - AluRmiROpcode::Xor, - RegMemImm::reg(lo_reg.to_reg()), - lo_reg, - )); - } else { - ret.push(Inst::imm(OperandSize::Size64, lo, lo_reg)); - } - if hi == 0 { - ret.push(Inst::alu_rmi_r( - OperandSize::Size64, - AluRmiROpcode::Xor, - RegMemImm::reg(hi_reg.to_reg()), - hi_reg, - )); - } else { - ret.push(Inst::imm(OperandSize::Size64, hi, hi_reg)); - } - } else { - let to_reg = to_regs - .only_reg() - .expect("multi-reg values not supported on x64"); - if ty == types::F32 { - if value == 0 { - ret.push(Inst::xmm_rm_r( - SseOpcode::Xorps, - RegMem::reg(to_reg.to_reg()), - to_reg, - )); - } else { - let tmp = alloc_tmp(types::I32); - ret.push(Inst::imm(OperandSize::Size32, value as u64, tmp)); - - ret.push(Inst::gpr_to_xmm( - SseOpcode::Movd, - RegMem::reg(tmp.to_reg()), - OperandSize::Size32, - to_reg, - )); - } - } else if ty == types::F64 { - if value == 0 { - ret.push(Inst::xmm_rm_r( - SseOpcode::Xorpd, - RegMem::reg(to_reg.to_reg()), - to_reg, - )); - } else { - let tmp = alloc_tmp(types::I64); - ret.push(Inst::imm(OperandSize::Size64, value as u64, tmp)); - - ret.push(Inst::gpr_to_xmm( - SseOpcode::Movq, - RegMem::reg(tmp.to_reg()), - OperandSize::Size64, - to_reg, - )); - } - } else { - // Must be an integer type. - debug_assert!( - ty == types::I8 - || ty == types::I16 - || ty == types::I32 - || ty == types::I64 - || ty == types::R32 - || ty == types::R64 - ); - // Immediates must be 32 or 64 bits. - // Smaller types are widened. - let size = match OperandSize::from_ty(ty) { - OperandSize::Size64 => OperandSize::Size64, - _ => OperandSize::Size32, - }; - if value == 0 { - ret.push(Inst::alu_rmi_r( - size, - AluRmiROpcode::Xor, - RegMemImm::reg(to_reg.to_reg()), - to_reg, - )); - } else { - let value = value as u64; - ret.push(Inst::imm(size, value.into(), to_reg)); - } - } - } - ret - } - fn gen_dummy_use(reg: Reg) -> Self { Inst::DummyUse { reg } } diff --git a/cranelift/codegen/src/isa/x64/lower.rs b/cranelift/codegen/src/isa/x64/lower.rs index 7224b64baf..64e72c23ef 100644 --- a/cranelift/codegen/src/isa/x64/lower.rs +++ b/cranelift/codegen/src/isa/x64/lower.rs @@ -7,13 +7,13 @@ use crate::ir::{types, ExternalName, Inst as IRInst, LibCall, Opcode, Type}; use crate::isa::x64::abi::*; use crate::isa::x64::inst::args::*; use crate::isa::x64::inst::*; -use crate::isa::{x64::settings as x64_settings, x64::X64Backend, CallConv}; +use crate::isa::{x64::X64Backend, CallConv}; use crate::machinst::abi::SmallInstVec; use crate::machinst::lower::*; use crate::machinst::*; use crate::result::CodegenResult; use crate::settings::Flags; -use smallvec::{smallvec, SmallVec}; +use smallvec::smallvec; use target_lexicon::Triple; //============================================================================= @@ -41,27 +41,6 @@ fn matches_input(ctx: &mut Lower, input: InsnInput, op: Opcode) -> Option< }) } -/// Emits instruction(s) to generate the given 64-bit constant value into a newly-allocated -/// temporary register, returning that register. -fn generate_constant(ctx: &mut Lower, ty: Type, c: u64) -> ValueRegs { - let from_bits = ty_bits(ty); - let masked = if from_bits < 64 { - c & ((1u64 << from_bits) - 1) - } else { - c - }; - - let cst_copy = ctx.alloc_tmp(ty); - for inst in Inst::gen_constant(cst_copy, masked as u128, ty, |ty| { - ctx.alloc_tmp(ty).only_reg().unwrap() - }) - .into_iter() - { - ctx.emit(inst); - } - non_writable_value_regs(cst_copy) -} - /// Put the given input into possibly multiple registers, and mark it as used (side-effect). fn put_input_in_regs(ctx: &mut Lower, spec: InsnInput) -> ValueRegs { let ty = ctx.input_ty(spec.insn, spec.input); @@ -69,7 +48,16 @@ fn put_input_in_regs(ctx: &mut Lower, spec: InsnInput) -> ValueRegs { if let Some(c) = input.constant { // Generate constants fresh at each use to minimize long-range register pressure. - generate_constant(ctx, ty, c) + let from_bits = ty_bits(ty); + let (size, c) = if from_bits < 64 { + (OperandSize::Size32, c & ((1u64 << from_bits) - 1)) + } else { + (OperandSize::Size64, c) + }; + assert!(is_int_or_ref_ty(ty)); // Only used for addresses. + let cst_copy = ctx.alloc_tmp(ty); + ctx.emit(Inst::imm(size, c, cst_copy.only_reg().unwrap())); + non_writable_value_regs(cst_copy) } else { ctx.put_input_in_regs(spec.insn, spec.input) } @@ -305,262 +293,246 @@ fn lower_to_amode(ctx: &mut Lower, spec: InsnInput, offset: i32) -> Amode Amode::imm_reg(offset as u32, input).with_flags(flags) } -//============================================================================= -// Top-level instruction lowering entry point, for one instruction. - -/// Actually codegen an instruction's results into registers. -fn lower_insn_to_regs( - ctx: &mut Lower, - insn: IRInst, - flags: &Flags, - isa_flags: &x64_settings::Flags, - triple: &Triple, -) -> CodegenResult<()> { - let outputs: SmallVec<[InsnOutput; 2]> = (0..ctx.num_outputs(insn)) - .map(|i| InsnOutput { insn, output: i }) - .collect(); - - if let Ok(()) = isle::lower(ctx, triple, flags, isa_flags, &outputs, insn) { - return Ok(()); - } - - let op = ctx.data(insn).opcode(); - match op { - Opcode::Iconst - | Opcode::F32const - | Opcode::F64const - | Opcode::Null - | Opcode::Iadd - | Opcode::IaddCout - | Opcode::SaddSat - | Opcode::UaddSat - | Opcode::Isub - | Opcode::SsubSat - | Opcode::UsubSat - | Opcode::AvgRound - | Opcode::Band - | Opcode::Bor - | Opcode::Bxor - | Opcode::Imul - | Opcode::BandNot - | Opcode::Iabs - | Opcode::Smax - | Opcode::Umax - | Opcode::Smin - | Opcode::Umin - | Opcode::Bnot - | Opcode::Bitselect - | Opcode::Vselect - | Opcode::Ushr - | Opcode::Sshr - | Opcode::Ishl - | Opcode::Rotl - | Opcode::Rotr - | Opcode::Ineg - | Opcode::Trap - | Opcode::ResumableTrap - | Opcode::UaddOverflowTrap - | Opcode::Clz - | Opcode::Ctz - | Opcode::Popcnt - | Opcode::Bitrev - | Opcode::Bswap - | Opcode::IsNull - | Opcode::IsInvalid - | Opcode::Uextend - | Opcode::Sextend - | Opcode::Ireduce - | Opcode::Debugtrap - | Opcode::WideningPairwiseDotProductS - | Opcode::Fadd - | Opcode::Fsub - | Opcode::Fmul - | Opcode::Fdiv - | Opcode::Fmin - | Opcode::Fmax - | Opcode::FminPseudo - | Opcode::FmaxPseudo - | Opcode::Sqrt - | Opcode::Fpromote - | Opcode::FvpromoteLow - | Opcode::Fdemote - | Opcode::Fvdemote - | Opcode::Fma - | Opcode::Icmp - | Opcode::Fcmp - | Opcode::Load - | Opcode::Uload8 - | Opcode::Sload8 - | Opcode::Uload16 - | Opcode::Sload16 - | Opcode::Uload32 - | Opcode::Sload32 - | Opcode::Sload8x8 - | Opcode::Uload8x8 - | Opcode::Sload16x4 - | Opcode::Uload16x4 - | Opcode::Sload32x2 - | Opcode::Uload32x2 - | Opcode::Store - | Opcode::Istore8 - | Opcode::Istore16 - | Opcode::Istore32 - | Opcode::AtomicRmw - | Opcode::AtomicCas - | Opcode::AtomicLoad - | Opcode::AtomicStore - | Opcode::Fence - | Opcode::FuncAddr - | Opcode::SymbolValue - | Opcode::Return - | Opcode::Call - | Opcode::CallIndirect - | Opcode::GetFramePointer - | Opcode::GetStackPointer - | Opcode::GetReturnAddress - | Opcode::Select - | Opcode::SelectSpectreGuard - | Opcode::FcvtFromSint - | Opcode::FcvtLowFromSint - | Opcode::FcvtFromUint - | Opcode::FcvtToUint - | Opcode::FcvtToSint - | Opcode::FcvtToUintSat - | Opcode::FcvtToSintSat - | Opcode::IaddPairwise - | Opcode::UwidenHigh - | Opcode::UwidenLow - | Opcode::SwidenHigh - | Opcode::SwidenLow - | Opcode::Snarrow - | Opcode::Unarrow - | Opcode::Bitcast - | Opcode::Fabs - | Opcode::Fneg - | Opcode::Fcopysign - | Opcode::Ceil - | Opcode::Floor - | Opcode::Nearest - | Opcode::Trunc - | Opcode::StackAddr - | Opcode::Udiv - | Opcode::Urem - | Opcode::Sdiv - | Opcode::Srem - | Opcode::Umulhi - | Opcode::Smulhi - | Opcode::GetPinnedReg - | Opcode::SetPinnedReg - | Opcode::Vconst - | Opcode::Insertlane - | Opcode::Shuffle - | Opcode::Swizzle - | Opcode::Extractlane - | Opcode::ScalarToVector - | Opcode::Splat - | Opcode::VanyTrue - | Opcode::VallTrue - | Opcode::VhighBits - | Opcode::Iconcat - | Opcode::Isplit - | Opcode::TlsValue - | Opcode::SqmulRoundSat - | Opcode::Uunarrow - | Opcode::Nop - | Opcode::Bmask => { - let ty = if outputs.len() > 0 { - Some(ctx.output_ty(insn, 0)) - } else { - None - }; - - unreachable!( - "implemented in ISLE: inst = `{}`, type = `{:?}`", - ctx.dfg().display_inst(insn), - ty - ) - } - - Opcode::DynamicStackAddr => unimplemented!("DynamicStackAddr"), - - // Unimplemented opcodes below. These are not currently used by Wasm - // lowering or other known embeddings, but should be either supported or - // removed eventually - Opcode::ExtractVector => { - unimplemented!("ExtractVector not supported"); - } - - Opcode::Cls => unimplemented!("Cls not supported"), - - Opcode::BorNot | Opcode::BxorNot => { - unimplemented!("or-not / xor-not opcodes not implemented"); - } - - Opcode::Vsplit | Opcode::Vconcat => { - unimplemented!("Vector split/concat ops not implemented."); - } - - Opcode::IaddImm - | Opcode::ImulImm - | Opcode::UdivImm - | Opcode::SdivImm - | Opcode::UremImm - | Opcode::SremImm - | Opcode::IrsubImm - | Opcode::IaddCin - | Opcode::IaddCarry - | Opcode::IsubBin - | Opcode::IsubBout - | Opcode::IsubBorrow - | Opcode::BandImm - | Opcode::BorImm - | Opcode::BxorImm - | Opcode::RotlImm - | Opcode::RotrImm - | Opcode::IshlImm - | Opcode::UshrImm - | Opcode::SshrImm - | Opcode::IcmpImm => { - panic!("ALU+imm and ALU+carry ops should not appear here!"); - } - - Opcode::StackLoad - | Opcode::StackStore - | Opcode::DynamicStackStore - | Opcode::DynamicStackLoad => { - panic!("Direct stack memory access not supported; should have been legalized"); - } - - Opcode::GlobalValue => { - panic!("global_value should have been removed by legalization!"); - } - - Opcode::HeapLoad | Opcode::HeapStore | Opcode::HeapAddr => { - panic!("heap access instructions should have been removed by legalization!"); - } - - Opcode::TableAddr => { - panic!("table_addr should have been removed by legalization!"); - } - - Opcode::Trapz | Opcode::Trapnz | Opcode::ResumableTrapnz => { - panic!("trapz / trapnz / resumable_trapnz should have been removed by legalization!"); - } - - Opcode::Jump | Opcode::Brz | Opcode::Brnz | Opcode::BrTable => { - panic!("Branch opcode reached non-branch lowering logic!"); - } - } -} - //============================================================================= // Lowering-backend trait implementation. impl LowerBackend for X64Backend { type MInst = Inst; - fn lower(&self, ctx: &mut Lower, ir_inst: IRInst) -> CodegenResult<()> { - lower_insn_to_regs(ctx, ir_inst, &self.flags, &self.x64_flags, &self.triple) + fn lower(&self, ctx: &mut Lower, ir_inst: IRInst) -> CodegenResult { + if let Some(temp_regs) = isle::lower(ctx, self, ir_inst) { + return Ok(temp_regs); + } + + let op = ctx.data(ir_inst).opcode(); + let ty = if ctx.num_outputs(ir_inst) > 0 { + Some(ctx.output_ty(ir_inst, 0)) + } else { + None + }; + + match op { + Opcode::Iconst + | Opcode::F32const + | Opcode::F64const + | Opcode::Null + | Opcode::Iadd + | Opcode::IaddCout + | Opcode::SaddSat + | Opcode::UaddSat + | Opcode::Isub + | Opcode::SsubSat + | Opcode::UsubSat + | Opcode::AvgRound + | Opcode::Band + | Opcode::Bor + | Opcode::Bxor + | Opcode::Imul + | Opcode::BandNot + | Opcode::Iabs + | Opcode::Smax + | Opcode::Umax + | Opcode::Smin + | Opcode::Umin + | Opcode::Bnot + | Opcode::Bitselect + | Opcode::Vselect + | Opcode::Ushr + | Opcode::Sshr + | Opcode::Ishl + | Opcode::Rotl + | Opcode::Rotr + | Opcode::Ineg + | Opcode::Trap + | Opcode::ResumableTrap + | Opcode::UaddOverflowTrap + | Opcode::Clz + | Opcode::Ctz + | Opcode::Popcnt + | Opcode::Bitrev + | Opcode::Bswap + | Opcode::IsNull + | Opcode::IsInvalid + | Opcode::Uextend + | Opcode::Sextend + | Opcode::Ireduce + | Opcode::Debugtrap + | Opcode::WideningPairwiseDotProductS + | Opcode::Fadd + | Opcode::Fsub + | Opcode::Fmul + | Opcode::Fdiv + | Opcode::Fmin + | Opcode::Fmax + | Opcode::FminPseudo + | Opcode::FmaxPseudo + | Opcode::Sqrt + | Opcode::Fpromote + | Opcode::FvpromoteLow + | Opcode::Fdemote + | Opcode::Fvdemote + | Opcode::Fma + | Opcode::Icmp + | Opcode::Fcmp + | Opcode::Load + | Opcode::Uload8 + | Opcode::Sload8 + | Opcode::Uload16 + | Opcode::Sload16 + | Opcode::Uload32 + | Opcode::Sload32 + | Opcode::Sload8x8 + | Opcode::Uload8x8 + | Opcode::Sload16x4 + | Opcode::Uload16x4 + | Opcode::Sload32x2 + | Opcode::Uload32x2 + | Opcode::Store + | Opcode::Istore8 + | Opcode::Istore16 + | Opcode::Istore32 + | Opcode::AtomicRmw + | Opcode::AtomicCas + | Opcode::AtomicLoad + | Opcode::AtomicStore + | Opcode::Fence + | Opcode::FuncAddr + | Opcode::SymbolValue + | Opcode::Return + | Opcode::Call + | Opcode::CallIndirect + | Opcode::GetFramePointer + | Opcode::GetStackPointer + | Opcode::GetReturnAddress + | Opcode::Select + | Opcode::SelectSpectreGuard + | Opcode::FcvtFromSint + | Opcode::FcvtLowFromSint + | Opcode::FcvtFromUint + | Opcode::FcvtToUint + | Opcode::FcvtToSint + | Opcode::FcvtToUintSat + | Opcode::FcvtToSintSat + | Opcode::IaddPairwise + | Opcode::UwidenHigh + | Opcode::UwidenLow + | Opcode::SwidenHigh + | Opcode::SwidenLow + | Opcode::Snarrow + | Opcode::Unarrow + | Opcode::Bitcast + | Opcode::Fabs + | Opcode::Fneg + | Opcode::Fcopysign + | Opcode::Ceil + | Opcode::Floor + | Opcode::Nearest + | Opcode::Trunc + | Opcode::StackAddr + | Opcode::Udiv + | Opcode::Urem + | Opcode::Sdiv + | Opcode::Srem + | Opcode::Umulhi + | Opcode::Smulhi + | Opcode::GetPinnedReg + | Opcode::SetPinnedReg + | Opcode::Vconst + | Opcode::Insertlane + | Opcode::Shuffle + | Opcode::Swizzle + | Opcode::Extractlane + | Opcode::ScalarToVector + | Opcode::Splat + | Opcode::VanyTrue + | Opcode::VallTrue + | Opcode::VhighBits + | Opcode::Iconcat + | Opcode::Isplit + | Opcode::TlsValue + | Opcode::SqmulRoundSat + | Opcode::Uunarrow + | Opcode::Nop + | Opcode::Bmask => { + unreachable!( + "implemented in ISLE: inst = `{}`, type = `{:?}`", + ctx.dfg().display_inst(ir_inst), + ty + ) + } + + Opcode::DynamicStackAddr => unimplemented!("DynamicStackAddr"), + + // Unimplemented opcodes below. These are not currently used by Wasm + // lowering or other known embeddings, but should be either supported or + // removed eventually + Opcode::ExtractVector => { + unimplemented!("ExtractVector not supported"); + } + + Opcode::Cls => unimplemented!("Cls not supported"), + + Opcode::BorNot | Opcode::BxorNot => { + unimplemented!("or-not / xor-not opcodes not implemented"); + } + + Opcode::Vsplit | Opcode::Vconcat => { + unimplemented!("Vector split/concat ops not implemented."); + } + + Opcode::IaddImm + | Opcode::ImulImm + | Opcode::UdivImm + | Opcode::SdivImm + | Opcode::UremImm + | Opcode::SremImm + | Opcode::IrsubImm + | Opcode::IaddCin + | Opcode::IaddCarry + | Opcode::IsubBin + | Opcode::IsubBout + | Opcode::IsubBorrow + | Opcode::BandImm + | Opcode::BorImm + | Opcode::BxorImm + | Opcode::RotlImm + | Opcode::RotrImm + | Opcode::IshlImm + | Opcode::UshrImm + | Opcode::SshrImm + | Opcode::IcmpImm => { + panic!("ALU+imm and ALU+carry ops should not appear here!"); + } + + Opcode::StackLoad + | Opcode::StackStore + | Opcode::DynamicStackStore + | Opcode::DynamicStackLoad => { + panic!("Direct stack memory access not supported; should have been legalized"); + } + + Opcode::GlobalValue => { + panic!("global_value should have been removed by legalization!"); + } + + Opcode::HeapLoad | Opcode::HeapStore | Opcode::HeapAddr => { + panic!("heap access instructions should have been removed by legalization!"); + } + + Opcode::TableAddr => { + panic!("table_addr should have been removed by legalization!"); + } + + Opcode::Trapz | Opcode::Trapnz | Opcode::ResumableTrapnz => { + panic!( + "trapz / trapnz / resumable_trapnz should have been removed by legalization!" + ); + } + + Opcode::Jump | Opcode::Brz | Opcode::Brnz | Opcode::BrTable => { + panic!("Branch opcode reached non-branch lowering logic!"); + } + } } fn lower_branch_group( @@ -581,14 +553,8 @@ impl LowerBackend for X64Backend { assert!(op1 == Opcode::Jump); } - if let Ok(()) = isle::lower_branch( - ctx, - &self.triple, - &self.flags, - &self.x64_flags, - branches[0], - targets, - ) { + if let Some(temp_regs) = isle::lower_branch(ctx, self, branches[0], targets) { + assert!(temp_regs.len() == 0); return Ok(()); } diff --git a/cranelift/codegen/src/isa/x64/lower/isle.rs b/cranelift/codegen/src/isa/x64/lower/isle.rs index 1fa9d652a4..a4f52e6969 100644 --- a/cranelift/codegen/src/isa/x64/lower/isle.rs +++ b/cranelift/codegen/src/isa/x64/lower/isle.rs @@ -14,6 +14,7 @@ use generated_code::{Context, MInst, RegisterClass}; use super::{is_int_or_ref_ty, is_mergeable_load, lower_to_amode}; use crate::ir::LibCall; use crate::isa::x64::lower::emit_vm_call; +use crate::isa::x64::X64Backend; use crate::{ ir::{ condcodes::{CondCode, FloatCC, IntCC}, @@ -22,16 +23,14 @@ use crate::{ Inst, InstructionData, MemFlags, Opcode, TrapCode, Value, ValueList, }, isa::{ - settings::Flags, unwind::UnwindInst, x64::{ abi::X64Caller, inst::{args::*, regs, CallInfo}, - settings::Flags as IsaFlags, }, }, machinst::{ - isle::*, valueregs, ArgPair, InsnInput, InsnOutput, Lower, MachAtomicRmwOp, MachInst, + isle::*, valueregs, ArgPair, InsnInput, InstOutput, Lower, MachAtomicRmwOp, MachInst, VCodeConstant, VCodeConstantData, }, }; @@ -40,7 +39,6 @@ use regalloc2::PReg; use smallvec::SmallVec; use std::boxed::Box; use std::convert::TryFrom; -use target_lexicon::Triple; type BoxCallInfo = Box; type BoxVecMachLabel = Box>; @@ -56,43 +54,28 @@ pub struct SinkableLoad { /// The main entry point for lowering with ISLE. pub(crate) fn lower( lower_ctx: &mut Lower, - triple: &Triple, - flags: &Flags, - isa_flags: &IsaFlags, - outputs: &[InsnOutput], + backend: &X64Backend, inst: Inst, -) -> Result<(), ()> { - lower_common( - lower_ctx, - triple, - flags, - isa_flags, - outputs, - inst, - |cx, insn| generated_code::constructor_lower(cx, insn), - ) +) -> Option { + // TODO: reuse the ISLE context across lowerings so we can reuse its + // internal heap allocations. + let mut isle_ctx = IsleContext { lower_ctx, backend }; + generated_code::constructor_lower(&mut isle_ctx, inst) } pub(crate) fn lower_branch( lower_ctx: &mut Lower, - triple: &Triple, - flags: &Flags, - isa_flags: &IsaFlags, + backend: &X64Backend, branch: Inst, targets: &[MachLabel], -) -> Result<(), ()> { - lower_common( - lower_ctx, - triple, - flags, - isa_flags, - &[], - branch, - |cx, insn| generated_code::constructor_lower_branch(cx, insn, targets), - ) +) -> Option { + // TODO: reuse the ISLE context across lowerings so we can reuse its + // internal heap allocations. + let mut isle_ctx = IsleContext { lower_ctx, backend }; + generated_code::constructor_lower_branch(&mut isle_ctx, branch, &targets.to_vec()) } -impl Context for IsleContext<'_, '_, MInst, Flags, IsaFlags, 6> { +impl Context for IsleContext<'_, '_, MInst, X64Backend> { isle_lower_prelude_methods!(); isle_prelude_caller_methods!(X64ABIMachineSpec, X64Caller); @@ -204,52 +187,52 @@ impl Context for IsleContext<'_, '_, MInst, Flags, IsaFlags, 6> { #[inline] fn avx512vl_enabled(&mut self, _: Type) -> bool { - self.isa_flags.use_avx512vl_simd() + self.backend.x64_flags.use_avx512vl_simd() } #[inline] fn avx512dq_enabled(&mut self, _: Type) -> bool { - self.isa_flags.use_avx512dq_simd() + self.backend.x64_flags.use_avx512dq_simd() } #[inline] fn avx512f_enabled(&mut self, _: Type) -> bool { - self.isa_flags.use_avx512f_simd() + self.backend.x64_flags.use_avx512f_simd() } #[inline] fn avx512bitalg_enabled(&mut self, _: Type) -> bool { - self.isa_flags.use_avx512bitalg_simd() + self.backend.x64_flags.use_avx512bitalg_simd() } #[inline] fn avx512vbmi_enabled(&mut self, _: Type) -> bool { - self.isa_flags.use_avx512vbmi_simd() + self.backend.x64_flags.use_avx512vbmi_simd() } #[inline] fn use_lzcnt(&mut self, _: Type) -> bool { - self.isa_flags.use_lzcnt() + self.backend.x64_flags.use_lzcnt() } #[inline] fn use_bmi1(&mut self, _: Type) -> bool { - self.isa_flags.use_bmi1() + self.backend.x64_flags.use_bmi1() } #[inline] fn use_popcnt(&mut self, _: Type) -> bool { - self.isa_flags.use_popcnt() + self.backend.x64_flags.use_popcnt() } #[inline] fn use_fma(&mut self, _: Type) -> bool { - self.isa_flags.use_fma() + self.backend.x64_flags.use_fma() } #[inline] fn use_sse41(&mut self, _: Type) -> bool { - self.isa_flags.use_sse41() + self.backend.x64_flags.use_sse41() } #[inline] @@ -647,8 +630,8 @@ impl Context for IsleContext<'_, '_, MInst, Flags, IsaFlags, 6> { emit_vm_call( self.lower_ctx, - self.flags, - self.triple, + &self.backend.flags, + &self.backend.triple, libcall.clone(), &[a], &[output_reg], @@ -665,8 +648,8 @@ impl Context for IsleContext<'_, '_, MInst, Flags, IsaFlags, 6> { emit_vm_call( self.lower_ctx, - self.flags, - self.triple, + &self.backend.flags, + &self.backend.triple, libcall.clone(), &[a, b, c], &[output_reg], @@ -884,7 +867,7 @@ impl Context for IsleContext<'_, '_, MInst, Flags, IsaFlags, 6> { let dst_remainder = self.lower_ctx.alloc_tmp(types::I64).only_reg().unwrap(); // Always do explicit checks for `srem`: otherwise, INT_MIN % -1 is not handled properly. - if self.flags.avoid_div_traps() || *kind == DivOrRemKind::SignedRem { + if self.backend.flags.avoid_div_traps() || *kind == DivOrRemKind::SignedRem { // A vcode meta-instruction is used to lower the inline checks, since they embed // pc-relative offsets that must not change, thus requiring regalloc to not // interfere by introducing spills and reloads. @@ -1003,7 +986,7 @@ impl Context for IsleContext<'_, '_, MInst, Flags, IsaFlags, 6> { } } -impl IsleContext<'_, '_, MInst, Flags, IsaFlags, 6> { +impl IsleContext<'_, '_, MInst, X64Backend> { isle_prelude_method_helpers!(X64Caller); } diff --git a/cranelift/codegen/src/machinst/compile.rs b/cranelift/codegen/src/machinst/compile.rs index 8ea78880e4..7ecd0a9adf 100644 --- a/cranelift/codegen/src/machinst/compile.rs +++ b/cranelift/codegen/src/machinst/compile.rs @@ -23,15 +23,7 @@ pub fn compile( let block_order = BlockLoweringOrder::new(f); // Build the lowering context. - let lower = crate::machinst::Lower::new( - f, - b.flags().clone(), - machine_env, - abi, - emit_info, - block_order, - sigs, - )?; + let lower = crate::machinst::Lower::new(f, machine_env, abi, emit_info, block_order, sigs)?; // Lower the IR. let vcode = { diff --git a/cranelift/codegen/src/machinst/helpers.rs b/cranelift/codegen/src/machinst/helpers.rs index ff5c1af0c6..81b3143662 100644 --- a/cranelift/codegen/src/machinst/helpers.rs +++ b/cranelift/codegen/src/machinst/helpers.rs @@ -1,7 +1,5 @@ //! Miscellaneous helpers for machine backends. -use super::{InsnOutput, Lower, VCodeInst, ValueRegs}; -use super::{Reg, Writable}; use crate::ir::Type; use std::ops::{Add, BitAnd, Not, Sub}; @@ -20,14 +18,6 @@ pub(crate) fn ty_has_float_or_vec_representation(ty: Type) -> bool { ty.is_vector() || ty.is_float() } -/// Allocate a register for an instruction output and return it. -pub(crate) fn get_output_reg( - ctx: &mut Lower, - spec: InsnOutput, -) -> ValueRegs> { - ctx.get_output(spec.insn, spec.output) -} - /// Align a size up to a power-of-two alignment. pub(crate) fn align_to(x: N, alignment: N) -> N where diff --git a/cranelift/codegen/src/machinst/inst_common.rs b/cranelift/codegen/src/machinst/inst_common.rs index 7c693c38a6..daab9dc531 100644 --- a/cranelift/codegen/src/machinst/inst_common.rs +++ b/cranelift/codegen/src/machinst/inst_common.rs @@ -1,8 +1,6 @@ //! A place to park MachInst::Inst fragments which are common across multiple architectures. -use super::{Lower, VCodeInst}; use crate::ir::{self, Inst as IRInst}; -use smallvec::SmallVec; //============================================================================ // Instruction input "slots". @@ -24,15 +22,6 @@ pub(crate) struct InsnOutput { pub(crate) output: usize, } -pub(crate) fn insn_outputs( - ctx: &Lower, - insn: IRInst, -) -> SmallVec<[InsnOutput; 4]> { - (0..ctx.num_outputs(insn)) - .map(|i| InsnOutput { insn, output: i }) - .collect() -} - //============================================================================ // Atomic instructions. diff --git a/cranelift/codegen/src/machinst/isle.rs b/cranelift/codegen/src/machinst/isle.rs index b151445472..8d861c8f6e 100644 --- a/cranelift/codegen/src/machinst/isle.rs +++ b/cranelift/codegen/src/machinst/isle.rs @@ -1,10 +1,8 @@ -use crate::ir::{Inst, Value, ValueList}; -use crate::machinst::{get_output_reg, InsnOutput}; +use crate::ir::{Value, ValueList}; use alloc::boxed::Box; use alloc::vec::Vec; use smallvec::SmallVec; use std::cell::Cell; -use target_lexicon::Triple; pub use super::MachLabel; use super::RetPair; @@ -13,9 +11,10 @@ pub use crate::ir::{ DynamicStackSlot, ExternalName, FuncRef, GlobalValue, Immediate, SigRef, StackSlot, }; pub use crate::isa::unwind::UnwindInst; +pub use crate::isa::TargetIsa; pub use crate::machinst::{ - ABIArg, ABIArgSlot, InputSourceInst, Lower, RealReg, Reg, RelocDistance, Sig, VCodeInst, - Writable, + ABIArg, ABIArgSlot, InputSourceInst, Lower, LowerBackend, RealReg, Reg, RelocDistance, Sig, + VCodeInst, Writable, }; pub use crate::settings::TlsModel; @@ -123,11 +122,32 @@ macro_rules! isle_lower_prelude_methods { #[inline] fn put_in_reg(&mut self, val: Value) -> Reg { - self.lower_ctx.put_value_in_regs(val).only_reg().unwrap() + self.put_in_regs(val).only_reg().unwrap() } #[inline] fn put_in_regs(&mut self, val: Value) -> ValueRegs { + // If the value is a constant, then (re)materialize it at each + // use. This lowers register pressure. (Only do this if we are + // not using egraph-based compilation; the egraph framework + // more efficiently rematerializes constants where needed.) + if !self.backend.flags().use_egraphs() { + let inputs = self.lower_ctx.get_value_as_source_or_const(val); + if inputs.constant.is_some() { + let insn = match inputs.inst { + InputSourceInst::UniqueUse(insn, 0) => Some(insn), + InputSourceInst::Use(insn, 0) => Some(insn), + _ => None, + }; + if let Some(insn) = insn { + if let Ok(regs) = self.backend.lower(self.lower_ctx, insn) { + assert!(regs.len() == 1); + return regs[0]; + } + } + } + } + self.lower_ctx.put_value_in_regs(val) } @@ -263,7 +283,7 @@ macro_rules! isle_lower_prelude_methods { } fn avoid_div_traps(&mut self, _: Type) -> Option<()> { - if self.flags.avoid_div_traps() { + if self.backend.flags().avoid_div_traps() { Some(()) } else { None @@ -272,12 +292,12 @@ macro_rules! isle_lower_prelude_methods { #[inline] fn tls_model(&mut self, _: Type) -> TlsModel { - self.flags.tls_model() + self.backend.flags().tls_model() } #[inline] fn tls_model_is_elf_gd(&mut self) -> Option<()> { - if self.flags.tls_model() == TlsModel::ElfGd { + if self.backend.flags().tls_model() == TlsModel::ElfGd { Some(()) } else { None @@ -286,7 +306,7 @@ macro_rules! isle_lower_prelude_methods { #[inline] fn tls_model_is_macho(&mut self) -> Option<()> { - if self.flags.tls_model() == TlsModel::Macho { + if self.backend.flags().tls_model() == TlsModel::Macho { Some(()) } else { None @@ -295,7 +315,7 @@ macro_rules! isle_lower_prelude_methods { #[inline] fn tls_model_is_coff(&mut self) -> Option<()> { - if self.flags.tls_model() == TlsModel::Coff { + if self.backend.flags().tls_model() == TlsModel::Coff { Some(()) } else { None @@ -304,7 +324,7 @@ macro_rules! isle_lower_prelude_methods { #[inline] fn preserve_frame_pointers(&mut self) -> Option<()> { - if self.flags.preserve_frame_pointers() { + if self.backend.flags().preserve_frame_pointers() { Some(()) } else { None @@ -572,7 +592,7 @@ macro_rules! isle_prelude_caller_methods { &extname, dist, caller_conv, - self.flags.clone(), + self.backend.flags().clone(), ) .unwrap(); @@ -601,7 +621,7 @@ macro_rules! isle_prelude_caller_methods { ptr, Opcode::CallIndirect, caller_conv, - self.flags.clone(), + self.backend.flags().clone(), ) .unwrap(); @@ -641,7 +661,7 @@ macro_rules! isle_prelude_method_helpers { let input = inputs .get(off + i, &self.lower_ctx.dfg().value_lists) .unwrap(); - arg_regs.push(self.lower_ctx.put_value_in_regs(input)); + arg_regs.push(self.put_in_regs(input)); } for (i, arg_regs) in arg_regs.iter().enumerate() { caller.emit_copy_regs_to_buffer(self.lower_ctx, i, *arg_regs); @@ -708,77 +728,11 @@ macro_rules! isle_prelude_method_helpers { /// This structure is used to implement the ISLE-generated `Context` trait and /// internally has a temporary reference to a machinst `LowerCtx`. -pub(crate) struct IsleContext<'a, 'b, I, Flags, IsaFlags, const N: usize> +pub(crate) struct IsleContext<'a, 'b, I, B> where I: VCodeInst, - [(I, bool); N]: smallvec::Array, + B: LowerBackend, { pub lower_ctx: &'a mut Lower<'b, I>, - pub triple: &'a Triple, - pub flags: &'a Flags, - pub isa_flags: &'a IsaFlags, -} - -/// Shared lowering code amongst all backends for doing ISLE-based lowering. -/// -/// The `isle_lower` argument here is an ISLE-generated function for `lower` and -/// then this function otherwise handles register mapping and such around the -/// lowering. -pub(crate) fn lower_common( - lower_ctx: &mut Lower, - triple: &Triple, - flags: &Flags, - isa_flags: &IsaFlags, - outputs: &[InsnOutput], - inst: Inst, - isle_lower: IsleFunction, -) -> Result<(), ()> -where - I: VCodeInst, - [(I, bool); N]: smallvec::Array, - IsleFunction: Fn(&mut IsleContext<'_, '_, I, Flags, IsaFlags, N>, Inst) -> Option, -{ - // TODO: reuse the ISLE context across lowerings so we can reuse its - // internal heap allocations. - let mut isle_ctx = IsleContext { - lower_ctx, - triple, - flags, - isa_flags, - }; - - let temp_regs = isle_lower(&mut isle_ctx, inst).ok_or(())?; - - #[cfg(debug_assertions)] - { - debug_assert_eq!( - temp_regs.len(), - outputs.len(), - "the number of temporary values and destination values do \ - not match ({} != {}); ensure the correct registers are being \ - returned.", - temp_regs.len(), - outputs.len(), - ); - } - - // The ISLE generated code emits its own registers to define the - // instruction's lowered values in. However, other instructions - // that use this SSA value will be lowered assuming that the value - // is generated into a pre-assigned, different, register. - // - // To connect the two, we set up "aliases" in the VCodeBuilder - // that apply when it is building the Operand table for the - // regalloc to use. These aliases effectively rewrite any use of - // the pre-assigned register to the register that was returned by - // the ISLE lowering logic. - for i in 0..outputs.len() { - let regs = temp_regs[i]; - let dsts = get_output_reg(isle_ctx.lower_ctx, outputs[i]); - for (dst, temp) in dsts.regs().iter().zip(regs.regs().iter()) { - isle_ctx.lower_ctx.set_vreg_alias(dst.to_reg(), *temp); - } - } - - Ok(()) + pub backend: &'a B, } diff --git a/cranelift/codegen/src/machinst/lower.rs b/cranelift/codegen/src/machinst/lower.rs index ea2680b887..9ffbd6f17e 100644 --- a/cranelift/codegen/src/machinst/lower.rs +++ b/cranelift/codegen/src/machinst/lower.rs @@ -14,9 +14,9 @@ use crate::ir::{ Type, Value, ValueDef, ValueLabelAssignments, ValueLabelStart, }; use crate::machinst::{ - non_writable_value_regs, writable_value_regs, BlockIndex, BlockLoweringOrder, Callee, - LoweredBlock, MachLabel, Reg, SigSet, VCode, VCodeBuilder, VCodeConstant, VCodeConstantData, - VCodeConstants, VCodeInst, ValueRegs, Writable, + writable_value_regs, BlockIndex, BlockLoweringOrder, Callee, LoweredBlock, MachLabel, Reg, + SigSet, VCode, VCodeBuilder, VCodeConstant, VCodeConstantData, VCodeConstants, VCodeInst, + ValueRegs, Writable, }; use crate::{trace, CodegenResult}; use alloc::vec::Vec; @@ -26,6 +26,9 @@ use std::fmt::Debug; use super::{VCodeBuildDirection, VRegAllocator}; +/// A vector of ValueRegs, used to represent the outputs of an instruction. +pub type InstOutput = SmallVec<[ValueRegs; 2]>; + /// An "instruction color" partitions CLIF instructions by side-effecting ops. /// All instructions with the same "color" are guaranteed not to be separated by /// any side-effecting op (for this purpose, loads are also considered @@ -121,7 +124,7 @@ pub trait LowerBackend { /// edge (block-param actuals) into registers, because the actual branch /// generation (`lower_branch_group()`) happens *after* any possible merged /// out-edge. - fn lower(&self, ctx: &mut Lower, inst: Inst) -> CodegenResult<()>; + fn lower(&self, ctx: &mut Lower, inst: Inst) -> CodegenResult; /// Lower a block-terminating group of branches (which together can be seen /// as one N-way branch), given a vcode MachLabel for each target. @@ -146,9 +149,6 @@ pub struct Lower<'func, I: VCodeInst> { /// The function to lower. f: &'func Function, - /// Machine-independent flags. - flags: crate::settings::Flags, - /// The set of allocatable registers. allocatable: PRegSet, @@ -324,7 +324,6 @@ impl<'func, I: VCodeInst> Lower<'func, I> { /// Prepare a new lowering context for the given IR function. pub fn new( f: &'func Function, - flags: crate::settings::Flags, machine_env: &MachineEnv, abi: Callee, emit_info: I::Info, @@ -415,7 +414,6 @@ impl<'func, I: VCodeInst> Lower<'func, I> { Ok(Lower { f, - flags, allocatable: PRegSet::from(machine_env), vcode, vregs, @@ -742,7 +740,27 @@ impl<'func, I: VCodeInst> Lower<'func, I> { // or any of its outputs its used. if has_side_effect || value_needed { trace!("lowering: inst {}: {:?}", inst, self.f.dfg[inst]); - backend.lower(self, inst)?; + let temp_regs = backend.lower(self, inst)?; + + // The ISLE generated code emits its own registers to define the + // instruction's lowered values in. However, other instructions + // that use this SSA value will be lowered assuming that the value + // is generated into a pre-assigned, different, register. + // + // To connect the two, we set up "aliases" in the VCodeBuilder + // that apply when it is building the Operand table for the + // regalloc to use. These aliases effectively rewrite any use of + // the pre-assigned register to the register that was returned by + // the ISLE lowering logic. + debug_assert_eq!(temp_regs.len(), self.num_outputs(inst)); + for i in 0..self.num_outputs(inst) { + let regs = temp_regs[i]; + let dsts = self.value_regs[self.f.dfg.inst_results(inst)[i]]; + debug_assert_eq!(regs.len(), dsts.len()); + for (dst, temp) in dsts.regs().iter().zip(regs.regs().iter()) { + self.set_vreg_alias(*dst, *temp); + } + } } let loc = self.srcloc(inst); @@ -1249,33 +1267,6 @@ impl<'func, I: VCodeInst> Lower<'func, I> { assert!(!self.inst_sunk.contains(&inst)); } - // If the value is a constant, then (re)materialize it at each - // use. This lowers register pressure. (Only do this if we are - // not using egraph-based compilation; the egraph framework - // more efficiently rematerializes constants where needed.) - if !self.flags.use_egraphs() { - if let Some(c) = self - .f - .dfg - .value_def(val) - .inst() - .and_then(|inst| self.get_constant(inst)) - { - let ty = self.f.dfg.value_type(val); - let regs = self.alloc_tmp(ty); - trace!(" -> regs {:?}", regs); - assert!(regs.is_valid()); - - let insts = I::gen_constant(regs, c.into(), ty, |ty| { - self.alloc_tmp(ty).only_reg().unwrap() - }); - for inst in insts { - self.emit(inst); - } - return non_writable_value_regs(regs); - } - } - let regs = self.value_regs[val]; trace!(" -> regs {:?}", regs); assert!(regs.is_valid()); @@ -1284,19 +1275,6 @@ impl<'func, I: VCodeInst> Lower<'func, I> { regs } - - /// Get the `idx`th output register(s) of the given IR instruction. - /// - /// When `backend.lower_inst_to_regs(ctx, inst)` is called, it is expected - /// that the backend will write results to these output register(s). This - /// register will always be "fresh"; it is guaranteed not to overlap with - /// any of the inputs, and can be freely used as a scratch register within - /// the lowered instruction sequence, as long as its final value is the - /// result of the computation. - pub fn get_output(&self, ir_inst: Inst, idx: usize) -> ValueRegs> { - let val = self.f.dfg.inst_results(ir_inst)[idx]; - writable_value_regs(self.value_regs[val]) - } } /// Codegen primitives: allocate temps, emit instructions, set result registers, diff --git a/cranelift/codegen/src/machinst/mod.rs b/cranelift/codegen/src/machinst/mod.rs index 37f617fd6a..83c0701e6b 100644 --- a/cranelift/codegen/src/machinst/mod.rs +++ b/cranelift/codegen/src/machinst/mod.rs @@ -112,14 +112,6 @@ pub trait MachInst: Clone + Debug { /// Generate a move. fn gen_move(to_reg: Writable, from_reg: Reg, ty: Type) -> Self; - /// Generate a constant into a reg. - fn gen_constant Writable>( - to_regs: ValueRegs>, - value: u128, - ty: Type, - alloc_tmp: F, - ) -> SmallVec<[Self; 4]>; - /// Generate a dummy instruction that will keep a value alive but /// has no other purpose. fn gen_dummy_use(reg: Reg) -> Self; diff --git a/cranelift/filetests/filetests/isa/aarch64/arithmetic.clif b/cranelift/filetests/filetests/isa/aarch64/arithmetic.clif index b8c3d77ce5..164a6b5ba4 100644 --- a/cranelift/filetests/filetests/isa/aarch64/arithmetic.clif +++ b/cranelift/filetests/filetests/isa/aarch64/arithmetic.clif @@ -97,7 +97,7 @@ block0(v0: i64): } ; block0: -; orr x2, xzr, #2 +; movz x2, #2 ; udiv x0, x0, x2 ; ret @@ -176,7 +176,7 @@ block0(v0: i32): ; block0: ; mov w2, w0 -; orr w4, wzr, #2 +; movz w4, #2 ; udiv x0, x2, x4 ; ret @@ -474,7 +474,7 @@ block0(v0: i64): } ; block0: -; orr x2, xzr, #2 +; movz x2, #2 ; udiv x4, x0, x2 ; msub x0, x4, x2, x0 ; ret diff --git a/cranelift/filetests/filetests/isa/aarch64/bitops.clif b/cranelift/filetests/filetests/isa/aarch64/bitops.clif index 6ea2115a4e..8c616221dc 100644 --- a/cranelift/filetests/filetests/isa/aarch64/bitops.clif +++ b/cranelift/filetests/filetests/isa/aarch64/bitops.clif @@ -111,7 +111,7 @@ block0(v0: i128): ; clz x5, x0 ; lsr x7, x3, #6 ; madd x0, x5, x7, x3 -; movz w1, #0 +; movz x1, #0 ; ret function %c(i8) -> i8 { @@ -173,7 +173,7 @@ block0(v0: i128): ; subs xzr, x5, #63 ; csel x14, x11, xzr, eq ; add x0, x14, x5 -; movz w1, #0 +; movz x1, #0 ; ret function %d(i8) -> i8 { @@ -235,7 +235,7 @@ block0(v0: i128): ; clz x9, x5 ; lsr x11, x7, #6 ; madd x0, x9, x11, x7 -; movz w1, #0 +; movz x1, #0 ; ret function %d(i128) -> i128 { @@ -250,7 +250,7 @@ block0(v0: i128): ; cnt v7.16b, v4.16b ; addv b17, v7.16b ; umov w0, v17.b[0] -; movz w1, #0 +; movz x1, #0 ; ret function %d(i64) -> i64 { @@ -312,7 +312,7 @@ block0: } ; block0: -; movn x0, #0 +; movz w0, #255 ; sxtb w0, w0 ; ret @@ -324,7 +324,7 @@ block0: } ; block0: -; movn x0, #0 +; movz w0, #255 ; sxtb w0, w0 ; ret diff --git a/cranelift/filetests/filetests/isa/aarch64/bti.clif b/cranelift/filetests/filetests/isa/aarch64/bti.clif index 98d5957db4..531593a1da 100644 --- a/cranelift/filetests/filetests/isa/aarch64/bti.clif +++ b/cranelift/filetests/filetests/isa/aarch64/bti.clif @@ -35,25 +35,25 @@ block5(v5: i32): ; subs wzr, w0, #3 ; b.hs label1 ; csel x15, xzr, x0, hs ; csdb ; adr x14, pc+16 ; ldrsw x15, [x14, x15, uxtw #2] ; add x14, x14, x15 ; br x14 ; jt_entries [Label(MachLabel(3)), Label(MachLabel(5)), Label(MachLabel(7))] ; block1: -; movz x5, #4 +; movz w5, #4 ; b label2 ; block2: ; b label9 ; block3: ; bti j -; movz x5, #1 +; movz w5, #1 ; b label4 ; block4: ; b label9 ; block5: ; bti j -; movz x5, #2 +; movz w5, #2 ; b label6 ; block6: ; b label9 ; block7: ; bti j -; movz x5, #3 +; movz w5, #3 ; b label8 ; block8: ; b label9 diff --git a/cranelift/filetests/filetests/isa/aarch64/call.clif b/cranelift/filetests/filetests/isa/aarch64/call.clif index 71c3592479..8b07951caa 100644 --- a/cranelift/filetests/filetests/isa/aarch64/call.clif +++ b/cranelift/filetests/filetests/isa/aarch64/call.clif @@ -82,14 +82,14 @@ block0(v0: i8): ; mov x8, x0 ; sub sp, sp, #16 ; virtual_sp_offset_adjust 16 -; movz x0, #42 -; movz x1, #42 -; movz x2, #42 -; movz x3, #42 -; movz x4, #42 -; movz x5, #42 -; movz x6, #42 -; movz x7, #42 +; movz w0, #42 +; movz w1, #42 +; movz w2, #42 +; movz w3, #42 +; movz w4, #42 +; movz w5, #42 +; movz w6, #42 +; movz w7, #42 ; strb w8, [sp] ; ldr x8, 8 ; b 12 ; data TestCase(%g) + 0 ; blr x8 @@ -107,14 +107,14 @@ block0(v0: i8): ; block0: ; mov x9, x0 ; mov x8, x1 -; movz x0, #42 -; movz x1, #42 -; movz x2, #42 -; movz x3, #42 -; movz x4, #42 -; movz x5, #42 -; movz x6, #42 -; movz x7, #42 +; movz w0, #42 +; movz w1, #42 +; movz w2, #42 +; movz w3, #42 +; movz w4, #42 +; movz w5, #42 +; movz w6, #42 +; movz w7, #42 ; strb w9, [x8] ; ret @@ -442,8 +442,8 @@ block0: ; block0: ; mov x6, x0 -; movz x0, #0 -; movz x4, #1 +; movz w0, #0 +; movz w4, #1 ; str w4, [x6] ; ret diff --git a/cranelift/filetests/filetests/isa/aarch64/condbr.clif b/cranelift/filetests/filetests/isa/aarch64/condbr.clif index 6aa63389a7..1975528e36 100644 --- a/cranelift/filetests/filetests/isa/aarch64/condbr.clif +++ b/cranelift/filetests/filetests/isa/aarch64/condbr.clif @@ -370,7 +370,7 @@ block1: ; subs xzr, x1, x3 ; cset x9, ls ; csel x11, x6, x9, eq -; orr x13, xzr, #1 +; movz x13, #1 ; subs xzr, x13, x11 ; b.ls label1 ; b label2 ; block1: @@ -472,7 +472,7 @@ block1: ; subs xzr, x1, x3 ; cset x9, hs ; csel x11, x6, x9, eq -; orr x13, xzr, #1 +; movz x13, #1 ; subs xzr, x11, x13 ; b.hs label1 ; b label2 ; block1: diff --git a/cranelift/filetests/filetests/isa/aarch64/condops.clif b/cranelift/filetests/filetests/isa/aarch64/condops.clif index 9f964e7f3f..5c878b484c 100644 --- a/cranelift/filetests/filetests/isa/aarch64/condops.clif +++ b/cranelift/filetests/filetests/isa/aarch64/condops.clif @@ -287,7 +287,7 @@ block0(v0: i128, v1: i8, v2: i8): ; block0: ; movz x6, #42 -; movz w8, #0 +; movz x8, #0 ; subs xzr, x0, x6 ; ccmp x1, x8, #nzcv, eq ; csel x0, x2, x3, eq @@ -304,7 +304,7 @@ block0(v0: i128, v1: i16, v2: i16): ; block0: ; movz x6, #42 -; movz w8, #0 +; movz x8, #0 ; subs xzr, x0, x6 ; ccmp x1, x8, #nzcv, eq ; csel x0, x2, x3, eq @@ -321,7 +321,7 @@ block0(v0: i128, v1: i32, v2: i32): ; block0: ; movz x6, #42 -; movz w8, #0 +; movz x8, #0 ; subs xzr, x0, x6 ; ccmp x1, x8, #nzcv, eq ; csel x0, x2, x3, eq @@ -338,7 +338,7 @@ block0(v0: i128, v1: i64, v2: i64): ; block0: ; movz x6, #42 -; movz w8, #0 +; movz x8, #0 ; subs xzr, x0, x6 ; ccmp x1, x8, #nzcv, eq ; csel x0, x2, x3, eq @@ -355,7 +355,7 @@ block0(v0: i128, v1: i128, v2: i128): ; block0: ; movz x9, #42 -; movz w11, #0 +; movz x11, #0 ; subs xzr, x0, x9 ; ccmp x1, x11, #nzcv, eq ; csel x0, x2, x4, eq @@ -667,7 +667,7 @@ block0(v0: i128, v1: i8, v2: i8): ; block0: ; movz x6, #42 -; movz w8, #0 +; movz x8, #0 ; subs xzr, x0, x6 ; ccmp x1, x8, #nzcv, eq ; csel x0, x2, x3, eq @@ -685,7 +685,7 @@ block0(v0: i128, v1: i16, v2: i16): ; block0: ; movz x6, #42 -; movz w8, #0 +; movz x8, #0 ; subs xzr, x0, x6 ; ccmp x1, x8, #nzcv, eq ; csel x0, x2, x3, eq @@ -703,7 +703,7 @@ block0(v0: i128, v1: i32, v2: i32): ; block0: ; movz x6, #42 -; movz w8, #0 +; movz x8, #0 ; subs xzr, x0, x6 ; ccmp x1, x8, #nzcv, eq ; csel x0, x2, x3, eq @@ -721,7 +721,7 @@ block0(v0: i128, v1: i64, v2: i64): ; block0: ; movz x6, #42 -; movz w8, #0 +; movz x8, #0 ; subs xzr, x0, x6 ; ccmp x1, x8, #nzcv, eq ; csel x0, x2, x3, eq @@ -739,7 +739,7 @@ block0(v0: i128, v1: i128, v2: i128): ; block0: ; movz x9, #42 -; movz w11, #0 +; movz x11, #0 ; subs xzr, x0, x9 ; ccmp x1, x11, #nzcv, eq ; csel x0, x2, x4, eq diff --git a/cranelift/filetests/filetests/isa/aarch64/constants.clif b/cranelift/filetests/filetests/isa/aarch64/constants.clif index 4059ac782e..8eaeb75d02 100644 --- a/cranelift/filetests/filetests/isa/aarch64/constants.clif +++ b/cranelift/filetests/filetests/isa/aarch64/constants.clif @@ -9,7 +9,7 @@ block0: } ; block0: -; movn x0, #0 +; movz w0, #255 ; ret function %f() -> i16 { @@ -19,7 +19,7 @@ block0: } ; block0: -; movz x0, #0 +; movz w0, #0 ; ret function %f() -> i64 { @@ -164,7 +164,7 @@ block0: } ; block0: -; movn x0, #0 +; movn w0, #0 ; ret function %f() -> i32 { diff --git a/cranelift/filetests/filetests/isa/aarch64/extend-op.clif b/cranelift/filetests/filetests/isa/aarch64/extend-op.clif index 2c1bf039d3..be28f5c9ad 100644 --- a/cranelift/filetests/filetests/isa/aarch64/extend-op.clif +++ b/cranelift/filetests/filetests/isa/aarch64/extend-op.clif @@ -33,7 +33,7 @@ block0(v0: i64): } ; block0: -; movz w1, #0 +; movz x1, #0 ; ret function %i128_sextend_i64(i64) -> i128 { @@ -54,7 +54,7 @@ block0(v0: i32): ; block0: ; mov w0, w0 -; movz w1, #0 +; movz x1, #0 ; ret function %i128_sextend_i32(i32) -> i128 { @@ -76,7 +76,7 @@ block0(v0: i16): ; block0: ; uxth w0, w0 -; movz w1, #0 +; movz x1, #0 ; ret function %i128_sextend_i16(i16) -> i128 { @@ -98,7 +98,7 @@ block0(v0: i8): ; block0: ; uxtb w0, w0 -; movz w1, #0 +; movz x1, #0 ; ret function %i128_sextend_i8(i8) -> i128 { @@ -154,7 +154,7 @@ block0(v0: i8x16): ; block0: ; umov w0, v0.b[1] -; movz w1, #0 +; movz x1, #0 ; ret function %i8x16_sextend_i16(i8x16) -> i16 { @@ -233,7 +233,7 @@ block0(v0: i16x8): ; block0: ; umov w0, v0.h[1] -; movz w1, #0 +; movz x1, #0 ; ret function %i16x8_sextend_i32(i16x8) -> i32 { @@ -290,7 +290,7 @@ block0(v0: i32x4): ; block0: ; mov w0, v0.s[1] -; movz w1, #0 +; movz x1, #0 ; ret function %i32x4_sextend_i64(i32x4) -> i64 { @@ -325,7 +325,7 @@ block0(v0: i64x2): ; block0: ; mov x0, v0.d[1] -; movz w1, #0 +; movz x1, #0 ; ret function %i64x2_sextend_i128(i64x2) -> i128 { diff --git a/cranelift/filetests/filetests/isa/aarch64/heap_addr.clif b/cranelift/filetests/filetests/isa/aarch64/heap_addr.clif index 5e48d91e46..5cfdd522a6 100644 --- a/cranelift/filetests/filetests/isa/aarch64/heap_addr.clif +++ b/cranelift/filetests/filetests/isa/aarch64/heap_addr.clif @@ -79,7 +79,7 @@ block0(v0: i64, v1: i32): ; mov w8, w1 ; add x9, x0, x1, UXTW ; add x9, x9, #16 -; movz w6, #65512 +; movz x6, #65512 ; movz x10, #0 ; subs xzr, x8, x6 ; csel x0, x10, x9, hi diff --git a/cranelift/filetests/filetests/isa/aarch64/iconst-icmp-small.clif b/cranelift/filetests/filetests/isa/aarch64/iconst-icmp-small.clif index 6e2890034d..badca4c653 100644 --- a/cranelift/filetests/filetests/isa/aarch64/iconst-icmp-small.clif +++ b/cranelift/filetests/filetests/isa/aarch64/iconst-icmp-small.clif @@ -14,9 +14,9 @@ block0: } ; block0: -; movz x0, #56780 +; movz w0, #56780 ; uxth w2, w0 -; movz x4, #56780 +; movz w4, #56780 ; subs wzr, w2, w4, UXTH ; cset x0, ne ; ret diff --git a/cranelift/filetests/filetests/isa/aarch64/jumptable.clif b/cranelift/filetests/filetests/isa/aarch64/jumptable.clif index e721c369aa..c4ab852848 100644 --- a/cranelift/filetests/filetests/isa/aarch64/jumptable.clif +++ b/cranelift/filetests/filetests/isa/aarch64/jumptable.clif @@ -34,22 +34,22 @@ block5(v5: i32): ; subs wzr, w0, #3 ; b.hs label1 ; csel x15, xzr, x0, hs ; csdb ; adr x14, pc+16 ; ldrsw x15, [x14, x15, uxtw #2] ; add x14, x14, x15 ; br x14 ; jt_entries [Label(MachLabel(3)), Label(MachLabel(5)), Label(MachLabel(7))] ; block1: -; movz x5, #4 +; movz w5, #4 ; b label2 ; block2: ; b label9 ; block3: -; movz x5, #1 +; movz w5, #1 ; b label4 ; block4: ; b label9 ; block5: -; movz x5, #2 +; movz w5, #2 ; b label6 ; block6: ; b label9 ; block7: -; movz x5, #3 +; movz w5, #3 ; b label8 ; block8: ; b label9 diff --git a/cranelift/filetests/filetests/isa/aarch64/shift-rotate.clif b/cranelift/filetests/filetests/isa/aarch64/shift-rotate.clif index 488a887ddd..afb88e2009 100644 --- a/cranelift/filetests/filetests/isa/aarch64/shift-rotate.clif +++ b/cranelift/filetests/filetests/isa/aarch64/shift-rotate.clif @@ -13,7 +13,7 @@ block0(v0: i128, v1: i128): } ; block0: -; orr x5, xzr, #128 +; movz x5, #128 ; sub x7, x5, x2 ; lsr x9, x0, x2 ; lsr x11, x1, x2 @@ -96,7 +96,7 @@ block0(v0: i128, v1: i128): } ; block0: -; orr x5, xzr, #128 +; movz x5, #128 ; sub x7, x5, x2 ; lsl x9, x0, x2 ; lsl x11, x1, x2 diff --git a/cranelift/filetests/filetests/isa/aarch64/simd-bitwise-compile.clif b/cranelift/filetests/filetests/isa/aarch64/simd-bitwise-compile.clif index 06344fc59e..d14f2a31d4 100644 --- a/cranelift/filetests/filetests/isa/aarch64/simd-bitwise-compile.clif +++ b/cranelift/filetests/filetests/isa/aarch64/simd-bitwise-compile.clif @@ -162,7 +162,7 @@ block0: ; block0: ; ldr q5, pc+8 ; b 20 ; data.f128 0x0f0e0d0c0b0a09080706050403020100 -; movz x1, #1 +; movz w1, #1 ; and w3, w1, #7 ; sub x5, xzr, x3 ; dup v7.16b, w5 @@ -191,7 +191,7 @@ block0(v0: i8x16, v1: i32): } ; block0: -; movz x3, #3 +; movz w3, #3 ; and w5, w3, #7 ; sub x7, xzr, x5 ; dup v17.16b, w7 diff --git a/cranelift/filetests/filetests/isa/aarch64/simd_load_zero.clif b/cranelift/filetests/filetests/isa/aarch64/simd_load_zero.clif index 3d4e953559..3ddef146b9 100644 --- a/cranelift/filetests/filetests/isa/aarch64/simd_load_zero.clif +++ b/cranelift/filetests/filetests/isa/aarch64/simd_load_zero.clif @@ -10,9 +10,9 @@ block0: } ; block0: -; movz x0, #1 -; movk x0, x0, #1, LSL #48 -; fmov d0, x0 +; movz x1, #1 +; movk x1, x1, #1, LSL #48 +; fmov d0, x1 ; ret function %f2() -> i32x4 { @@ -23,7 +23,7 @@ block0: } ; block0: -; movz x0, #42679 +; movz w0, #42679 ; fmov s0, w0 ; ret diff --git a/cranelift/filetests/filetests/isa/aarch64/uadd_overflow_trap.clif b/cranelift/filetests/filetests/isa/aarch64/uadd_overflow_trap.clif index e3bfc8a784..10130dd18d 100644 --- a/cranelift/filetests/filetests/isa/aarch64/uadd_overflow_trap.clif +++ b/cranelift/filetests/filetests/isa/aarch64/uadd_overflow_trap.clif @@ -9,7 +9,7 @@ block0(v0: i32): } ; block0: -; movz x2, #127 +; movz w2, #127 ; adds w0, w0, w2 ; b.lo 8 ; udf ; ret @@ -22,7 +22,7 @@ block0(v0: i32): } ; block0: -; movz x2, #127 +; movz w2, #127 ; adds w0, w2, w0 ; b.lo 8 ; udf ; ret diff --git a/cranelift/filetests/filetests/isa/x64/branches.clif b/cranelift/filetests/filetests/isa/x64/branches.clif index 3ec1d27d08..2e3f54249d 100644 --- a/cranelift/filetests/filetests/isa/x64/branches.clif +++ b/cranelift/filetests/filetests/isa/x64/branches.clif @@ -307,17 +307,17 @@ block202: ; pushq %rbp ; movq %rsp, %rbp ; block0: -; movl $1112539136, %r8d -; movd %r8d, %xmm5 -; ucomiss %xmm5, %xmm0 +; movl $1112539136, %edx +; movd %edx, %xmm6 +; ucomiss %xmm6, %xmm0 ; jp label2 ; jnz label2; j label1 ; block1: ; jmp label5 ; block2: -; movl $1112539136, %esi -; movd %esi, %xmm9 -; ucomiss %xmm9, %xmm0 +; movl $1112539136, %r11d +; movd %r11d, %xmm10 +; ucomiss %xmm10, %xmm0 ; jnp label3; j label4 ; block3: ; ud2 heap_oob