diff --git a/Cargo.lock b/Cargo.lock index 6fb857c6cd..38b60d3d34 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2388,9 +2388,9 @@ dependencies = [ [[package]] name = "regalloc2" -version = "0.3.2" +version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d43a209257d978ef079f3d446331d0f1794f5e0fc19b306a199983857833a779" +checksum = "91ffba626f895ce5b8b97614bafa3fd59623490fe82f0fa8046dba6664a37b51" dependencies = [ "fxhash", "log", diff --git a/cranelift/codegen/Cargo.toml b/cranelift/codegen/Cargo.toml index 81e4d1294f..91cdcbd307 100644 --- a/cranelift/codegen/Cargo.toml +++ b/cranelift/codegen/Cargo.toml @@ -25,7 +25,7 @@ serde = { version = "1.0.94", features = ["derive"], optional = true } bincode = { version = "1.2.1", optional = true } gimli = { version = "0.26.0", default-features = false, features = ["write"], optional = true } smallvec = { version = "1.6.1" } -regalloc2 = { version = "0.3.2", features = ["checker"] } +regalloc2 = { version = "0.4.0", features = ["checker"] } souper-ir = { version = "2.1.0", optional = true } sha2 = { version = "0.9.0", optional = true } # It is a goal of the cranelift-codegen crate to have minimal external dependencies. diff --git a/cranelift/codegen/src/isa/aarch64/abi.rs b/cranelift/codegen/src/isa/aarch64/abi.rs index 2602d80953..6bd8f946c0 100644 --- a/cranelift/codegen/src/isa/aarch64/abi.rs +++ b/cranelift/codegen/src/isa/aarch64/abi.rs @@ -919,8 +919,8 @@ impl ABIMachineSpec for AArch64MachineDeps { fn gen_call( dest: &CallDest, - uses: SmallVec<[Reg; 8]>, - defs: SmallVec<[Writable; 8]>, + uses: CallArgList, + defs: CallRetList, clobbers: PRegSet, opcode: ir::Opcode, tmp: Writable, @@ -978,19 +978,32 @@ impl ABIMachineSpec for AArch64MachineDeps { call_conv: isa::CallConv, dst: Reg, src: Reg, + tmp: Writable, + _tmp2: Writable, size: usize, ) -> SmallVec<[Self::I; 8]> { let mut insts = SmallVec::new(); let arg0 = writable_xreg(0); let arg1 = writable_xreg(1); let arg2 = writable_xreg(2); - insts.push(Inst::gen_move(arg0, dst, I64)); - insts.push(Inst::gen_move(arg1, src, I64)); - insts.extend(Inst::load_constant(arg2, size as u64).into_iter()); + insts.extend(Inst::load_constant(tmp, size as u64).into_iter()); insts.push(Inst::Call { info: Box::new(CallInfo { dest: ExternalName::LibCall(LibCall::Memcpy), - uses: smallvec![arg0.to_reg(), arg1.to_reg(), arg2.to_reg()], + uses: smallvec![ + CallArgPair { + vreg: dst, + preg: arg0.to_reg() + }, + CallArgPair { + vreg: src, + preg: arg1.to_reg() + }, + CallArgPair { + vreg: tmp.to_reg(), + preg: arg2.to_reg() + } + ], defs: smallvec![], clobbers: Self::get_regs_clobbered_by_call(call_conv), opcode: Opcode::Call, diff --git a/cranelift/codegen/src/isa/aarch64/inst/mod.rs b/cranelift/codegen/src/isa/aarch64/inst/mod.rs index 34eb4f1aa4..744a581338 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/mod.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/mod.rs @@ -78,8 +78,8 @@ impl BitOp { #[derive(Clone, Debug)] pub struct CallInfo { pub dest: ExternalName, - pub uses: SmallVec<[Reg; 8]>, - pub defs: SmallVec<[Writable; 8]>, + pub uses: CallArgList, + pub defs: CallRetList, pub clobbers: PRegSet, pub opcode: Opcode, pub caller_callconv: CallConv, @@ -91,8 +91,8 @@ pub struct CallInfo { #[derive(Clone, Debug)] pub struct CallIndInfo { pub rn: Reg, - pub uses: SmallVec<[Reg; 8]>, - pub defs: SmallVec<[Writable; 8]>, + pub uses: SmallVec<[CallArgPair; 8]>, + pub defs: SmallVec<[CallRetPair; 8]>, pub clobbers: PRegSet, pub opcode: Opcode, pub caller_callconv: CallConv, @@ -1027,14 +1027,22 @@ fn aarch64_get_operands VReg>(inst: &Inst, collector: &mut Operan } &Inst::Jump { .. } => {} &Inst::Call { ref info, .. } => { - collector.reg_uses(&info.uses[..]); - collector.reg_defs(&info.defs[..]); + for u in &info.uses { + collector.reg_fixed_use(u.vreg, u.preg); + } + for d in &info.defs { + collector.reg_fixed_def(d.vreg, d.preg); + } collector.reg_clobbers(info.clobbers); } &Inst::CallInd { ref info, .. } => { collector.reg_use(info.rn); - collector.reg_uses(&info.uses[..]); - collector.reg_defs(&info.defs[..]); + for u in &info.uses { + collector.reg_fixed_use(u.vreg, u.preg); + } + for d in &info.defs { + collector.reg_fixed_def(d.vreg, d.preg); + } collector.reg_clobbers(info.clobbers); } &Inst::CondBr { ref kind, .. } => match kind { diff --git a/cranelift/codegen/src/isa/s390x/abi.rs b/cranelift/codegen/src/isa/s390x/abi.rs index 7a13a17f9d..c77ed2f905 100644 --- a/cranelift/codegen/src/isa/s390x/abi.rs +++ b/cranelift/codegen/src/isa/s390x/abi.rs @@ -743,8 +743,8 @@ impl ABIMachineSpec for S390xMachineDeps { fn gen_call( _dest: &CallDest, - _uses: SmallVec<[Reg; 8]>, - _defs: SmallVec<[Writable; 8]>, + _uses: CallArgList, + _defs: CallRetList, _clobbers: PRegSet, _opcode: ir::Opcode, _tmp: Writable, @@ -758,6 +758,8 @@ impl ABIMachineSpec for S390xMachineDeps { _call_conv: isa::CallConv, _dst: Reg, _src: Reg, + _tmp1: Writable, + _tmp2: Writable, _size: usize, ) -> SmallVec<[Self::I; 8]> { unimplemented!("StructArgs not implemented for S390X yet"); diff --git a/cranelift/codegen/src/isa/x64/abi.rs b/cranelift/codegen/src/isa/x64/abi.rs index efd235cdeb..7911be775d 100644 --- a/cranelift/codegen/src/isa/x64/abi.rs +++ b/cranelift/codegen/src/isa/x64/abi.rs @@ -429,7 +429,9 @@ impl ABIMachineSpec for X64ABIMachineSpec { insts.push(Inst::CallKnown { dest: ExternalName::LibCall(LibCall::Probestack), info: Box::new(CallInfo { - uses: smallvec![regs::rax()], + // No need to include arg here: we are post-regalloc + // so no constraints will be seen anyway. + uses: smallvec![], defs: smallvec![], clobbers: PRegSet::empty(), opcode: Opcode::Call, @@ -584,8 +586,8 @@ impl ABIMachineSpec for X64ABIMachineSpec { /// Generate a call instruction/sequence. fn gen_call( dest: &CallDest, - uses: SmallVec<[Reg; 8]>, - defs: SmallVec<[Writable; 8]>, + uses: CallArgList, + defs: CallRetList, clobbers: PRegSet, opcode: ir::Opcode, tmp: Writable, @@ -628,39 +630,47 @@ impl ABIMachineSpec for X64ABIMachineSpec { call_conv: isa::CallConv, dst: Reg, src: Reg, + temp: Writable, + temp2: Writable, size: usize, ) -> SmallVec<[Self::I; 8]> { let mut insts = SmallVec::new(); let arg0 = get_intreg_for_arg(&call_conv, 0, 0).unwrap(); let arg1 = get_intreg_for_arg(&call_conv, 1, 1).unwrap(); let arg2 = get_intreg_for_arg(&call_conv, 2, 2).unwrap(); - // We need a register to load the address of `memcpy()` below and we - // don't have a lowering context to allocate a temp here; so just use a - // register we know we are free to mutate as part of this sequence - // (because it is clobbered by the call as per the ABI anyway). - let memcpy_addr = get_intreg_for_arg(&call_conv, 3, 3).unwrap(); insts.push(Inst::gen_move(Writable::from_reg(arg0), dst, I64)); insts.push(Inst::gen_move(Writable::from_reg(arg1), src, I64)); insts.extend( - Inst::gen_constant( - ValueRegs::one(Writable::from_reg(arg2)), - size as u128, - I64, - |_| panic!("tmp should not be needed"), - ) + Inst::gen_constant(ValueRegs::one(temp), size as u128, I64, |_| { + panic!("tmp should not be needed") + }) .into_iter(), ); // We use an indirect call and a full LoadExtName because we do not have // information about the libcall `RelocDistance` here, so we // conservatively use the more flexible calling sequence. insts.push(Inst::LoadExtName { - dst: Writable::from_reg(memcpy_addr), + dst: temp2, name: Box::new(ExternalName::LibCall(LibCall::Memcpy)), offset: 0, }); insts.push(Inst::call_unknown( - RegMem::reg(memcpy_addr), - /* uses = */ smallvec![arg0, arg1, arg2], + RegMem::reg(temp2.to_reg()), + /* uses = */ + smallvec![ + CallArgPair { + vreg: dst, + preg: arg0 + }, + CallArgPair { + vreg: src, + preg: arg1 + }, + CallArgPair { + vreg: temp.to_reg(), + preg: arg2 + }, + ], /* defs = */ smallvec![], /* clobbers = */ Self::get_regs_clobbered_by_call(call_conv), Opcode::Call, diff --git a/cranelift/codegen/src/isa/x64/inst/mod.rs b/cranelift/codegen/src/isa/x64/inst/mod.rs index 9f67a7ef3d..ae92b7307e 100644 --- a/cranelift/codegen/src/isa/x64/inst/mod.rs +++ b/cranelift/codegen/src/isa/x64/inst/mod.rs @@ -1,7 +1,7 @@ //! This module defines x86_64-specific machine instruction types. use crate::binemit::{Addend, CodeOffset, Reloc, StackMap}; -use crate::ir::{types, ExternalName, Opcode, RelSourceLoc, TrapCode, Type}; +use crate::ir::{types, ExternalName, LibCall, Opcode, RelSourceLoc, TrapCode, Type}; use crate::isa::x64::abi::X64ABIMachineSpec; use crate::isa::x64::inst::regs::pretty_print_reg; use crate::isa::x64::settings as x64_settings; @@ -34,9 +34,9 @@ pub use super::lower::isle::generated_code::MInst as Inst; #[derive(Clone, Debug)] pub struct CallInfo { /// Register uses of this call. - pub uses: SmallVec<[Reg; 8]>, + pub uses: CallArgList, /// Register defs of this call. - pub defs: SmallVec<[Writable; 8]>, + pub defs: CallRetList, /// Registers clobbered by this call, as per its calling convention. pub clobbers: PRegSet, /// The opcode of this call. @@ -490,8 +490,8 @@ impl Inst { pub(crate) fn call_known( dest: ExternalName, - uses: SmallVec<[Reg; 8]>, - defs: SmallVec<[Writable; 8]>, + uses: CallArgList, + defs: CallRetList, clobbers: PRegSet, opcode: Opcode, ) -> Inst { @@ -508,8 +508,8 @@ impl Inst { pub(crate) fn call_unknown( dest: RegMem, - uses: SmallVec<[Reg; 8]>, - defs: SmallVec<[Writable; 8]>, + uses: CallArgList, + defs: CallRetList, clobbers: PRegSet, opcode: Opcode, ) -> Inst { @@ -1446,7 +1446,9 @@ impl PrettyPrint for Inst { format!("{} {}", ljustify("popq".to_string()), dst) } - Inst::CallKnown { dest, .. } => format!("{} {:?}", ljustify("call".to_string()), dest), + Inst::CallKnown { dest, .. } => { + format!("{} {:?}", ljustify("call".to_string()), dest) + } Inst::CallUnknown { dest, .. } => { let dest = dest.pretty_print(8, allocs); @@ -1981,23 +1983,28 @@ fn x64_get_operands VReg>(inst: &Inst, collector: &mut OperandCol collector.reg_early_def(*tmp); } - Inst::CallKnown { ref info, .. } => { - for &u in &info.uses { - collector.reg_use(u); + Inst::CallKnown { dest, ref info, .. } => { + // Probestack is special and is only inserted after + // regalloc, so we do not need to represent its ABI to the + // register allocator. Assert that we don't alter that + // arrangement. + debug_assert_ne!(*dest, ExternalName::LibCall(LibCall::Probestack)); + for u in &info.uses { + collector.reg_fixed_use(u.vreg, u.preg); } - for &d in &info.defs { - collector.reg_def(d); + for d in &info.defs { + collector.reg_fixed_def(d.vreg, d.preg); } collector.reg_clobbers(info.clobbers); } Inst::CallUnknown { ref info, dest, .. } => { dest.get_operands(collector); - for &u in &info.uses { - collector.reg_use(u); + for u in &info.uses { + collector.reg_fixed_use(u.vreg, u.preg); } - for &d in &info.defs { - collector.reg_def(d); + for d in &info.defs { + collector.reg_fixed_def(d.vreg, d.preg); } collector.reg_clobbers(info.clobbers); } diff --git a/cranelift/codegen/src/isa/x64/lower.rs b/cranelift/codegen/src/isa/x64/lower.rs index 1f151800b3..836b2c0056 100644 --- a/cranelift/codegen/src/isa/x64/lower.rs +++ b/cranelift/codegen/src/isa/x64/lower.rs @@ -8,11 +8,12 @@ use crate::isa::x64::abi::*; use crate::isa::x64::inst::args::*; use crate::isa::x64::inst::*; use crate::isa::{x64::settings as x64_settings, x64::X64Backend, CallConv}; +use crate::machinst::abi::SmallInstVec; use crate::machinst::lower::*; use crate::machinst::*; use crate::result::CodegenResult; use crate::settings::Flags; -use smallvec::SmallVec; +use smallvec::{smallvec, SmallVec}; use target_lexicon::Triple; //============================================================================= @@ -168,16 +169,18 @@ fn emit_vm_call( assert_eq!(inputs.len(), abi.num_args(ctx.sigs())); for (i, input) in inputs.iter().enumerate() { - for inst in abi.gen_copy_regs_to_arg(ctx, i, ValueRegs::one(*input)) { + for inst in abi.gen_arg(ctx, i, ValueRegs::one(*input)) { ctx.emit(inst); } } - abi.emit_call(ctx); + let mut retval_insts: SmallInstVec<_> = smallvec![]; for (i, output) in outputs.iter().enumerate() { - for inst in abi.gen_copy_retval_to_regs(ctx, i, ValueRegs::one(*output)) { - ctx.emit(inst); - } + retval_insts.extend(abi.gen_retval(ctx, i, ValueRegs::one(*output)).into_iter()); + } + abi.emit_call(ctx); + for inst in retval_insts { + ctx.emit(inst); } abi.emit_stack_post_adjust(ctx); diff --git a/cranelift/codegen/src/machinst/abi.rs b/cranelift/codegen/src/machinst/abi.rs index a7e7c3c204..bbff4abeca 100644 --- a/cranelift/codegen/src/machinst/abi.rs +++ b/cranelift/codegen/src/machinst/abi.rs @@ -506,8 +506,8 @@ pub trait ABIMachineSpec { /// temporary register to use to synthesize the called address, if needed. fn gen_call( dest: &CallDest, - uses: SmallVec<[Reg; 8]>, - defs: SmallVec<[Writable; 8]>, + uses: CallArgList, + defs: CallRetList, clobbers: PRegSet, opcode: ir::Opcode, tmp: Writable, @@ -515,13 +515,16 @@ pub trait ABIMachineSpec { callee_conv: isa::CallConv, ) -> SmallVec<[Self::I; 2]>; - /// Generate a memcpy invocation. Used to set up struct args. May clobber - /// caller-save registers; we only memcpy before we start to set up args for - /// a call. + /// Generate a memcpy invocation. Used to set up struct + /// args. Takes `src`, `dst` as read-only inputs and requires two + /// temporaries to generate the call (for the size immediate and + /// possibly for the address of `memcpy` itself). fn gen_memcpy( call_conv: isa::CallConv, dst: Reg, src: Reg, + tmp1: Writable, + tmp2: Writable, size: usize, ) -> SmallVec<[Self::I; 8]>; @@ -623,6 +626,9 @@ impl SigData { /// Return all uses (i.e, function args), defs (i.e., return values /// and caller-saved registers), and clobbers for the callsite. + /// + /// FIXME: used only by s390x; remove once that backend moves to + /// `call_clobbers` and constraint-based calls. pub fn call_uses_defs_clobbers( &self, ) -> (SmallVec<[Reg; 8]>, SmallVec<[Writable; 8]>, PRegSet) { @@ -682,6 +688,30 @@ impl SigData { (uses, defs, clobbers) } + /// Return all clobbers for the callsite. + pub fn call_clobbers(&self) -> PRegSet { + // Get clobbers: all caller-saves. These may include return value + // regs, which we will remove from the clobber set below. + let mut clobbers = M::get_regs_clobbered_by_call(self.call_conv); + + // Remove retval regs from clobbers. + for ret in &self.rets { + if let &ABIArg::Slots { ref slots, .. } = ret { + for slot in slots { + match slot { + &ABIArgSlot::Reg { reg, .. } => { + log::trace!("call_clobbers: retval reg {:?}", reg); + clobbers.remove(PReg::from(reg)); + } + _ => {} + } + } + } + } + + clobbers + } + /// Get the number of arguments expected. pub fn num_args(&self) -> usize { if self.stack_ret_arg.is_some() { @@ -1848,14 +1878,38 @@ impl Callee { } } +/// An input argument to a call instruction: the vreg that is used, +/// and the preg it is constrained to (per the ABI). +#[derive(Clone, Debug)] +pub struct CallArgPair { + /// The virtual register to use for the argument. + pub vreg: Reg, + /// The real register into which the arg goes. + pub preg: Reg, +} + +/// An output return value from a call instruction: the vreg that is +/// defined, and the preg it is constrained to (per the ABI). +#[derive(Clone, Debug)] +pub struct CallRetPair { + /// The virtual register to define from this return value. + pub vreg: Writable, + /// The real register from which the return value is read. + pub preg: Reg, +} + +pub type CallArgList = SmallVec<[CallArgPair; 8]>; +pub type CallRetList = SmallVec<[CallRetPair; 8]>; + /// ABI object for a callsite. pub struct Caller { /// The called function's signature. sig: Sig, - /// All uses for the callsite, i.e., function args. - uses: SmallVec<[Reg; 8]>, + /// All register uses for the callsite, i.e., function args, with + /// VReg and the physical register it is constrained to. + uses: CallArgList, /// All defs for the callsite, i.e., return values. - defs: SmallVec<[Writable; 8]>, + defs: CallRetList, /// Caller-save clobbers. clobbers: PRegSet, /// Call destination. @@ -1890,11 +1944,11 @@ impl Caller { flags: settings::Flags, ) -> CodegenResult> { let sig = sigs.abi_sig_for_sig_ref(sig_ref); - let (uses, defs, clobbers) = sigs[sig].call_uses_defs_clobbers::(); + let clobbers = sigs[sig].call_clobbers::(); Ok(Caller { sig, - uses, - defs, + uses: smallvec![], + defs: smallvec![], clobbers, dest: CallDest::ExtName(extname.clone(), dist), opcode: ir::Opcode::Call, @@ -1915,11 +1969,11 @@ impl Caller { flags: settings::Flags, ) -> CodegenResult> { let sig = sigs.abi_sig_for_signature(sig); - let (uses, defs, clobbers) = sigs[sig].call_uses_defs_clobbers::(); + let clobbers = sigs[sig].call_clobbers::(); Ok(Caller { sig, - uses, - defs, + uses: smallvec![], + defs: smallvec![], clobbers, dest: CallDest::ExtName(extname.clone(), dist), opcode: ir::Opcode::Call, @@ -1940,11 +1994,11 @@ impl Caller { flags: settings::Flags, ) -> CodegenResult> { let sig = sigs.abi_sig_for_sig_ref(sig_ref); - let (uses, defs, clobbers) = sigs[sig].call_uses_defs_clobbers::(); + let clobbers = sigs[sig].call_clobbers::(); Ok(Caller { sig, - uses, - defs, + uses: smallvec![], + defs: smallvec![], clobbers, dest: CallDest::Reg(ptr), opcode, @@ -2018,9 +2072,17 @@ impl Caller { // arg regs. let memcpy_call_conv = isa::CallConv::for_libcall(&self.flags, ctx.sigs()[self.sig].call_conv); - for insn in - M::gen_memcpy(memcpy_call_conv, dst_ptr.to_reg(), src_ptr, size as usize) - .into_iter() + let tmp1 = ctx.alloc_tmp(M::word_type()).only_reg().unwrap(); + let tmp2 = ctx.alloc_tmp(M::word_type()).only_reg().unwrap(); + for insn in M::gen_memcpy( + memcpy_call_conv, + dst_ptr.to_reg(), + src_ptr, + tmp1, + tmp2, + size as usize, + ) + .into_iter() { ctx.emit(insn); } @@ -2029,19 +2091,48 @@ impl Caller { } } - /// Generate a copy of an argument value from a source register, prior to - /// the call. For large arguments with associated stack buffer, this may - /// load the address of the buffer into the argument register, if required - /// by the ABI. - pub fn gen_copy_regs_to_arg( - &self, - ctx: &Lower, + /// Add a constraint for an argument value from a source register. + /// For large arguments with associated stack buffer, this may + /// load the address of the buffer into the argument register, if + /// required by the ABI. + pub fn gen_arg( + &mut self, + ctx: &mut Lower, idx: usize, from_regs: ValueRegs, ) -> SmallInstVec { let mut insts = smallvec![]; let word_rc = M::word_reg_class(); let word_bits = M::word_bits() as usize; + + // How many temps do we need for extends? Allocate them ahead + // of time, since we can't do it while we're iterating over + // the sig and immutably borrowing `ctx`. + let needed_tmps = match &ctx.sigs()[self.sig].args[idx] { + &ABIArg::Slots { ref slots, .. } => slots + .iter() + .map(|slot| match slot { + &ABIArgSlot::Reg { extension, .. } + if extension != ir::ArgumentExtension::None => + { + 1 + } + &ABIArgSlot::Reg { ty, .. } if ty.is_ref() => 1, + &ABIArgSlot::Reg { .. } => 0, + &ABIArgSlot::Stack { extension, .. } + if extension != ir::ArgumentExtension::None => + { + 1 + } + &ABIArgSlot::Stack { .. } => 0, + }) + .sum(), + _ => 0, + }; + let mut temps: SmallVec<[Writable; 16]> = (0..needed_tmps) + .map(|_| ctx.alloc_tmp(M::word_type()).only_reg().unwrap()) + .collect(); + match &ctx.sigs()[self.sig].args[idx] { &ABIArg::Slots { ref slots, .. } => { assert_eq!(from_regs.len(), slots.len()); @@ -2058,19 +2149,36 @@ impl Caller { ir::ArgumentExtension::Sext => true, _ => unreachable!(), }; + let extend_result = + temps.pop().expect("Must have allocated enough temps"); insts.push(M::gen_extend( - Writable::from_reg(Reg::from(reg)), + extend_result, *from_reg, signed, ty_bits(ty) as u8, word_bits as u8, )); + self.uses.push(CallArgPair { + vreg: extend_result.to_reg(), + preg: reg.into(), + }); + } else if ty.is_ref() { + // Reference-typed args need to be + // passed as a copy; the original vreg + // is constrained to the stack and + // this copy is in a reg. + let ref_copy = + temps.pop().expect("Must have allocated enough temps"); + insts.push(M::gen_move(ref_copy, *from_reg, M::word_type())); + self.uses.push(CallArgPair { + vreg: ref_copy.to_reg(), + preg: reg.into(), + }); } else { - insts.push(M::gen_move( - Writable::from_reg(Reg::from(reg)), - *from_reg, - ty, - )); + self.uses.push(CallArgPair { + vreg: *from_reg, + preg: reg.into(), + }); } } &ABIArgSlot::Stack { @@ -2079,31 +2187,32 @@ impl Caller { extension, .. } => { - let mut ty = ty; let ext = M::get_ext_mode(ctx.sigs()[self.sig].call_conv, extension); - if ext != ir::ArgumentExtension::None && ty_bits(ty) < word_bits { - assert_eq!(word_rc, from_reg.class()); - let signed = match ext { - ir::ArgumentExtension::Uext => false, - ir::ArgumentExtension::Sext => true, - _ => unreachable!(), + let (data, ty) = + if ext != ir::ArgumentExtension::None && ty_bits(ty) < word_bits { + assert_eq!(word_rc, from_reg.class()); + let signed = match ext { + ir::ArgumentExtension::Uext => false, + ir::ArgumentExtension::Sext => true, + _ => unreachable!(), + }; + let extend_result = + temps.pop().expect("Must have allocated enough temps"); + insts.push(M::gen_extend( + extend_result, + *from_reg, + signed, + ty_bits(ty) as u8, + word_bits as u8, + )); + // Store the extended version. + (extend_result.to_reg(), M::word_type()) + } else { + (*from_reg, ty) }; - // Extend in place in the source register. Our convention is to - // treat high bits as undefined for values in registers, so this - // is safe, even for an argument that is nominally read-only. - insts.push(M::gen_extend( - Writable::from_reg(*from_reg), - *from_reg, - signed, - ty_bits(ty) as u8, - word_bits as u8, - )); - // Store the extended version. - ty = M::word_type(); - } insts.push(M::gen_store_stack( StackAMode::SPOffset(offset, ty), - *from_reg, + data, ty, )); } @@ -2118,9 +2227,9 @@ impl Caller { insts } - /// Emit a copy a return value into a destination register, after the call returns. - pub fn gen_copy_retval_to_regs( - &self, + /// Define a return value after the call returns. + pub fn gen_retval( + &mut self, ctx: &Lower, idx: usize, into_regs: ValueRegs>, @@ -2133,8 +2242,11 @@ impl Caller { match slot { // Extension mode doesn't matter because we're copying out, not in, // and we ignore high bits in our own registers by convention. - &ABIArgSlot::Reg { reg, ty, .. } => { - insts.push(M::gen_move(*into_reg, Reg::from(reg), ty)); + &ABIArgSlot::Reg { reg, .. } => { + self.defs.push(CallRetPair { + vreg: *into_reg, + preg: reg.into(), + }); } &ABIArgSlot::Stack { offset, ty, .. } => { let ret_area_base = ctx.sigs()[self.sig].sized_stack_arg_space; @@ -2171,10 +2283,6 @@ impl Caller { /// This function should only be called once, as it is allowed to re-use /// parts of the `Caller` object in emitting instructions. pub fn emit_call(&mut self, ctx: &mut Lower) { - let (uses, defs) = ( - mem::replace(&mut self.uses, Default::default()), - mem::replace(&mut self.defs, Default::default()), - ); let word_type = M::word_type(); if let Some(i) = ctx.sigs()[self.sig].stack_ret_arg { let rd = ctx.alloc_tmp(word_type).only_reg().unwrap(); @@ -2184,10 +2292,16 @@ impl Caller { rd, I8, )); - for inst in self.gen_copy_regs_to_arg(ctx, i, ValueRegs::one(rd.to_reg())) { + for inst in self.gen_arg(ctx, i, ValueRegs::one(rd.to_reg())) { ctx.emit(inst); } } + + let (uses, defs) = ( + mem::replace(&mut self.uses, Default::default()), + mem::replace(&mut self.defs, Default::default()), + ); + let tmp = ctx.alloc_tmp(word_type).only_reg().unwrap(); for inst in M::gen_call( &self.dest, diff --git a/cranelift/codegen/src/machinst/isle.rs b/cranelift/codegen/src/machinst/isle.rs index feff699d4e..d15340d66d 100644 --- a/cranelift/codegen/src/machinst/isle.rs +++ b/cranelift/codegen/src/machinst/isle.rs @@ -1208,21 +1208,32 @@ macro_rules! isle_prelude_method_helpers { caller.emit_copy_regs_to_buffer(self.lower_ctx, i, *arg_regs); } for (i, arg_regs) in arg_regs.iter().enumerate() { - for inst in caller.gen_copy_regs_to_arg(self.lower_ctx, i, *arg_regs) { + for inst in caller.gen_arg(self.lower_ctx, i, *arg_regs) { self.lower_ctx.emit(inst); } } - caller.emit_call(self.lower_ctx); - + // Handle retvals prior to emitting call, so the + // constraints are on the call instruction; but buffer the + // instructions till after the call. let mut outputs = InstOutput::new(); + let mut retval_insts: crate::machinst::abi::SmallInstVec<_> = smallvec::smallvec![]; for i in 0..num_rets { let ret = self.lower_ctx.sigs()[abi].get_ret(i); let retval_regs = self.abi_arg_slot_regs(&ret).unwrap(); - for inst in caller.gen_copy_retval_to_regs(self.lower_ctx, i, retval_regs.clone()) { - self.lower_ctx.emit(inst); - } + retval_insts.extend( + caller + .gen_retval(self.lower_ctx, i, retval_regs.clone()) + .into_iter(), + ); outputs.push(valueregs::non_writable_value_regs(retval_regs)); } + + caller.emit_call(self.lower_ctx); + + for inst in retval_insts { + self.lower_ctx.emit(inst); + } + caller.emit_stack_post_adjust(self.lower_ctx); outputs diff --git a/cranelift/codegen/src/machinst/vcode.rs b/cranelift/codegen/src/machinst/vcode.rs index 1af40ccf6d..7f36389cd9 100644 --- a/cranelift/codegen/src/machinst/vcode.rs +++ b/cranelift/codegen/src/machinst/vcode.rs @@ -1027,7 +1027,6 @@ impl VCode { // Spill from register to spillslot. let to = to.as_stack().unwrap(); let from_rreg = RealReg::from(from); - debug_assert_eq!(from.class(), to.class()); let spill = self.abi.gen_spill(to, from_rreg); do_emit(&spill, &[], &mut disasm, &mut buffer, &mut state); } @@ -1035,7 +1034,6 @@ impl VCode { // Load from spillslot to register. let from = from.as_stack().unwrap(); let to_rreg = Writable::from_reg(RealReg::from(to)); - debug_assert_eq!(from.class(), to.class()); let reload = self.abi.gen_reload(to_rreg, from); do_emit(&reload, &[], &mut disasm, &mut buffer, &mut state); } diff --git a/cranelift/filetests/filetests/isa/aarch64/amodes.clif b/cranelift/filetests/filetests/isa/aarch64/amodes.clif index 8347f9eb0c..c28ec2eb49 100644 --- a/cranelift/filetests/filetests/isa/aarch64/amodes.clif +++ b/cranelift/filetests/filetests/isa/aarch64/amodes.clif @@ -69,9 +69,10 @@ block0(v0: i64, v1: i64, v2: i64): } ; block0: -; add x0, x0, x2 -; add x0, x0, x1 -; ldr w0, [x0, #48] +; mov x6, x0 +; add x6, x6, x2 +; add x6, x6, x1 +; ldr w0, [x6, #48] ; ret function %f10(i64, i64, i64) -> i32 { @@ -232,11 +233,11 @@ block0(v0: i64): } ; block0: -; mov x6, x0 -; ldp x7, x1, [x6] -; mov x11, x7 -; stp x11, x1, [x0] -; mov x0, x7 +; mov x8, x0 +; mov x6, x8 +; ldp x0, x1, [x6] +; mov x7, x8 +; stp x0, x1, [x7] ; ret function %i128_imm_offset(i64) -> i128 { @@ -247,11 +248,11 @@ block0(v0: i64): } ; block0: -; mov x6, x0 -; ldp x7, x1, [x6, #16] -; mov x11, x7 -; stp x11, x1, [x0, #16] -; mov x0, x7 +; mov x8, x0 +; mov x6, x8 +; ldp x0, x1, [x6, #16] +; mov x7, x8 +; stp x0, x1, [x7, #16] ; ret function %i128_imm_offset_large(i64) -> i128 { @@ -262,11 +263,11 @@ block0(v0: i64): } ; block0: -; mov x6, x0 -; ldp x7, x1, [x6, #504] -; mov x11, x7 -; stp x11, x1, [x0, #504] -; mov x0, x7 +; mov x8, x0 +; mov x6, x8 +; ldp x0, x1, [x6, #504] +; mov x7, x8 +; stp x0, x1, [x7, #504] ; ret function %i128_imm_offset_negative_large(i64) -> i128 { @@ -277,11 +278,11 @@ block0(v0: i64): } ; block0: -; mov x6, x0 -; ldp x7, x1, [x6, #-512] -; mov x11, x7 -; stp x11, x1, [x0, #-512] -; mov x0, x7 +; mov x8, x0 +; mov x6, x8 +; ldp x0, x1, [x6, #-512] +; mov x7, x8 +; stp x0, x1, [x7, #-512] ; ret function %i128_add_offset(i64) -> i128 { @@ -293,11 +294,11 @@ block0(v0: i64): } ; block0: -; mov x6, x0 -; ldp x7, x1, [x6, #32] -; mov x11, x7 -; stp x11, x1, [x0, #32] -; mov x0, x7 +; mov x8, x0 +; mov x6, x8 +; ldp x0, x1, [x6, #32] +; mov x7, x8 +; stp x0, x1, [x7, #32] ; ret function %i128_32bit_sextend_simple(i32) -> i128 { @@ -327,13 +328,13 @@ block0(v0: i64, v1: i32): } ; block0: -; mov x7, x0 +; mov x11, x0 +; mov x7, x11 ; add x7, x7, x1, SXTW -; ldp x9, x10, [x7, #24] -; add x0, x0, x1, SXTW -; mov x14, x9 +; ldp x0, x10, [x7, #24] +; mov x9, x11 +; add x9, x9, x1, SXTW ; mov x1, x10 -; stp x14, x1, [x0, #24] -; mov x0, x9 +; stp x0, x1, [x9, #24] ; ret diff --git a/cranelift/filetests/filetests/isa/aarch64/bti.clif b/cranelift/filetests/filetests/isa/aarch64/bti.clif index 4e7ea3075f..157a767fa3 100644 --- a/cranelift/filetests/filetests/isa/aarch64/bti.clif +++ b/cranelift/filetests/filetests/isa/aarch64/bti.clif @@ -109,3 +109,4 @@ block0(v0: i64): ; blr x4 ; ldp fp, lr, [sp], #16 ; ret + diff --git a/cranelift/filetests/filetests/isa/aarch64/call.clif b/cranelift/filetests/filetests/isa/aarch64/call.clif index 7aec05dd8e..64ea276ae3 100644 --- a/cranelift/filetests/filetests/isa/aarch64/call.clif +++ b/cranelift/filetests/filetests/isa/aarch64/call.clif @@ -79,7 +79,7 @@ block0(v0: i8): ; stp fp, lr, [sp, #-16]! ; mov fp, sp ; block0: -; mov x15, x0 +; mov x8, x0 ; sub sp, sp, #16 ; virtual_sp_offset_adjust 16 ; movz x0, #42 @@ -90,9 +90,9 @@ block0(v0: i8): ; movz x5, #42 ; movz x6, #42 ; movz x7, #42 -; strb w15, [sp] -; ldr x14, 8 ; b 12 ; data TestCase(%g) + 0 -; blr x14 +; strb w8, [sp] +; ldr x8, 8 ; b 12 ; data TestCase(%g) + 0 +; blr x8 ; add sp, sp, #16 ; virtual_sp_offset_adjust -16 ; ldp fp, lr, [sp], #16 @@ -105,7 +105,7 @@ block0(v0: i8): } ; block0: -; mov x15, x0 +; mov x8, x0 ; mov x13, x1 ; movz x0, #42 ; movz x1, #42 @@ -115,7 +115,8 @@ block0(v0: i8): ; movz x5, #42 ; movz x6, #42 ; movz x7, #42 -; strb w15, [x13] +; mov x11, x8 +; strb w11, [x13] ; ret function %f8() { @@ -140,26 +141,26 @@ block0: ; mov fp, sp ; sub sp, sp, #48 ; block0: -; ldr x8, 8 ; b 12 ; data TestCase(%g0) + 0 -; blr x8 +; ldr x9, 8 ; b 12 ; data TestCase(%g0) + 0 +; blr x9 ; str q0, [sp, #32] ; ldr x9, 8 ; b 12 ; data TestCase(%g1) + 0 ; blr x9 ; str q0, [sp, #16] -; ldr x10, 8 ; b 12 ; data TestCase(%g1) + 0 -; blr x10 +; ldr x9, 8 ; b 12 ; data TestCase(%g1) + 0 +; blr x9 ; str q0, [sp] -; ldr x12, 8 ; b 12 ; data TestCase(%g2) + 0 -; blr x12 +; ldr x9, 8 ; b 12 ; data TestCase(%g2) + 0 +; blr x9 +; ldr x10, 8 ; b 12 ; data TestCase(%g3) + 0 ; ldr q0, [sp, #32] -; ldr x14, 8 ; b 12 ; data TestCase(%g3) + 0 -; blr x14 +; blr x10 +; ldr x11, 8 ; b 12 ; data TestCase(%g4) + 0 ; ldr q0, [sp, #16] -; ldr x0, 8 ; b 12 ; data TestCase(%g4) + 0 -; blr x0 +; blr x11 +; ldr x12, 8 ; b 12 ; data TestCase(%g4) + 0 ; ldr q0, [sp] -; ldr x2, 8 ; b 12 ; data TestCase(%g4) + 0 -; blr x2 +; blr x12 ; add sp, sp, #48 ; ldp fp, lr, [sp], #16 ; ret @@ -184,26 +185,26 @@ block0: ; mov fp, sp ; sub sp, sp, #48 ; block0: -; ldr x8, 8 ; b 12 ; data TestCase(%g0) + 0 -; blr x8 +; ldr x9, 8 ; b 12 ; data TestCase(%g0) + 0 +; blr x9 ; str q0, [sp, #32] ; ldr x9, 8 ; b 12 ; data TestCase(%g0) + 0 ; blr x9 ; str q0, [sp, #16] -; ldr x10, 8 ; b 12 ; data TestCase(%g0) + 0 -; blr x10 +; ldr x9, 8 ; b 12 ; data TestCase(%g0) + 0 +; blr x9 ; str q0, [sp] -; ldr x12, 8 ; b 12 ; data TestCase(%g1) + 0 -; blr x12 +; ldr x9, 8 ; b 12 ; data TestCase(%g1) + 0 +; blr x9 +; ldr x10, 8 ; b 12 ; data TestCase(%g2) + 0 ; ldr q0, [sp, #32] -; ldr x14, 8 ; b 12 ; data TestCase(%g2) + 0 -; blr x14 +; blr x10 +; ldr x11, 8 ; b 12 ; data TestCase(%g2) + 0 ; ldr q0, [sp, #16] -; ldr x0, 8 ; b 12 ; data TestCase(%g2) + 0 -; blr x0 +; blr x11 +; ldr x12, 8 ; b 12 ; data TestCase(%g2) + 0 ; ldr q0, [sp] -; ldr x2, 8 ; b 12 ; data TestCase(%g2) + 0 -; blr x2 +; blr x12 ; add sp, sp, #48 ; ldp fp, lr, [sp], #16 ; ret @@ -232,26 +233,26 @@ block0: ; mov fp, sp ; sub sp, sp, #48 ; block0: -; ldr x8, 8 ; b 12 ; data TestCase(%g0) + 0 -; blr x8 +; ldr x9, 8 ; b 12 ; data TestCase(%g0) + 0 +; blr x9 ; str q0, [sp, #32] ; ldr x9, 8 ; b 12 ; data TestCase(%g1) + 0 ; blr x9 ; str q0, [sp, #16] -; ldr x10, 8 ; b 12 ; data TestCase(%g2) + 0 -; blr x10 +; ldr x9, 8 ; b 12 ; data TestCase(%g2) + 0 +; blr x9 ; str q0, [sp] -; ldr x12, 8 ; b 12 ; data TestCase(%g3) + 0 -; blr x12 +; ldr x9, 8 ; b 12 ; data TestCase(%g3) + 0 +; blr x9 +; ldr x10, 8 ; b 12 ; data TestCase(%g4) + 0 ; ldr q0, [sp, #32] -; ldr x14, 8 ; b 12 ; data TestCase(%g4) + 0 -; blr x14 +; blr x10 +; ldr x11, 8 ; b 12 ; data TestCase(%g5) + 0 ; ldr q0, [sp, #16] -; ldr x0, 8 ; b 12 ; data TestCase(%g5) + 0 -; blr x0 +; blr x11 +; ldr x12, 8 ; b 12 ; data TestCase(%g6) + 0 ; ldr q0, [sp] -; ldr x2, 8 ; b 12 ; data TestCase(%g6) + 0 -; blr x2 +; blr x12 ; add sp, sp, #48 ; ldp fp, lr, [sp], #16 ; ret @@ -279,12 +280,11 @@ block0(v0: i64): ; stp fp, lr, [sp, #-16]! ; mov fp, sp ; block0: -; mov x7, x0 +; mov x1, x0 ; movz x0, #42 ; movz x2, #42 -; mov x1, x7 -; ldr x9, 8 ; b 12 ; data TestCase(%f11) + 0 -; blr x9 +; ldr x7, 8 ; b 12 ; data TestCase(%f11) + 0 +; blr x7 ; ldp fp, lr, [sp], #16 ; ret @@ -311,12 +311,11 @@ block0(v0: i64): ; stp fp, lr, [sp, #-16]! ; mov fp, sp ; block0: -; mov x7, x0 +; mov x2, x0 ; movz x3, #42 ; movz x0, #42 -; mov x2, x7 -; ldr x9, 8 ; b 12 ; data TestCase(%f12) + 0 -; blr x9 +; ldr x7, 8 ; b 12 ; data TestCase(%f12) + 0 +; blr x7 ; ldp fp, lr, [sp], #16 ; ret @@ -343,12 +342,11 @@ block0(v0: i64): ; stp fp, lr, [sp, #-16]! ; mov fp, sp ; block0: -; mov x7, x0 +; mov x1, x0 ; movz x2, #42 ; movz x0, #42 -; mov x1, x7 -; ldr x9, 8 ; b 12 ; data TestCase(%f13) + 0 -; blr x9 +; ldr x7, 8 ; b 12 ; data TestCase(%f13) + 0 +; blr x7 ; ldp fp, lr, [sp], #16 ; ret @@ -376,20 +374,19 @@ block0(v0: i128, v1: i64): ; stp fp, lr, [sp, #-16]! ; mov fp, sp ; block0: -; mov x11, x2 +; mov x6, x2 ; sub sp, sp, #16 ; virtual_sp_offset_adjust 16 -; mov x10, x0 -; mov x12, x1 -; mov x2, x10 -; mov x3, x12 -; mov x4, x10 -; mov x5, x12 -; mov x6, x11 -; str x10, [sp] -; str x12, [sp, #8] -; ldr x7, 8 ; b 12 ; data TestCase(%f14) + 0 -; blr x7 +; str x0, [sp] +; mov x4, x0 +; str x1, [sp, #8] +; mov x5, x1 +; ldr x12, 8 ; b 12 ; data TestCase(%f14) + 0 +; mov x0, x4 +; mov x2, x4 +; mov x1, x5 +; mov x3, x5 +; blr x12 ; add sp, sp, #16 ; virtual_sp_offset_adjust -16 ; ldp fp, lr, [sp], #16 @@ -419,20 +416,19 @@ block0(v0: i128, v1: i64): ; stp fp, lr, [sp, #-16]! ; mov fp, sp ; block0: -; mov x11, x2 +; mov x6, x2 ; sub sp, sp, #16 ; virtual_sp_offset_adjust 16 -; mov x10, x0 -; mov x12, x1 -; mov x2, x10 -; mov x3, x12 -; mov x4, x10 -; mov x5, x12 -; mov x6, x11 -; str x10, [sp] -; str x12, [sp, #8] -; ldr x7, 8 ; b 12 ; data TestCase(%f15) + 0 -; blr x7 +; str x0, [sp] +; mov x4, x0 +; str x1, [sp, #8] +; mov x5, x1 +; ldr x12, 8 ; b 12 ; data TestCase(%f15) + 0 +; mov x0, x4 +; mov x2, x4 +; mov x1, x5 +; mov x3, x5 +; blr x12 ; add sp, sp, #16 ; virtual_sp_offset_adjust -16 ; ldp fp, lr, [sp], #16 @@ -496,8 +492,8 @@ block0(v0: i64): ; str x24, [sp, #-16]! ; block0: ; mov x24, x8 -; ldr x5, 8 ; b 12 ; data TestCase(%g) + 0 -; blr x5 +; ldr x4, 8 ; b 12 ; data TestCase(%g) + 0 +; blr x4 ; mov x8, x24 ; ldr x24, [sp], #16 ; ldp fp, lr, [sp], #16 diff --git a/cranelift/filetests/filetests/isa/aarch64/floating-point.clif b/cranelift/filetests/filetests/isa/aarch64/floating-point.clif index 2ffd58c16e..c1e6b228cc 100644 --- a/cranelift/filetests/filetests/isa/aarch64/floating-point.clif +++ b/cranelift/filetests/filetests/isa/aarch64/floating-point.clif @@ -918,8 +918,9 @@ block0(v0: f32x4, v1: f32x4, v2: f32x4): } ; block0: -; fmla v2.4s, v2.4s, v0.4s, v1.4s -; mov v0.16b, v2.16b +; mov v5.16b, v2.16b +; fmla v5.4s, v5.4s, v0.4s, v1.4s +; mov v0.16b, v5.16b ; ret function %f79(f32x2, f32x2, f32x2) -> f32x2 { @@ -929,8 +930,9 @@ block0(v0: f32x2, v1: f32x2, v2: f32x2): } ; block0: -; fmla v2.2s, v2.2s, v0.2s, v1.2s -; mov v0.16b, v2.16b +; mov v5.16b, v2.16b +; fmla v5.2s, v5.2s, v0.2s, v1.2s +; mov v0.16b, v5.16b ; ret function %f80(f64x2, f64x2, f64x2) -> f64x2 { @@ -940,8 +942,9 @@ block0(v0: f64x2, v1: f64x2, v2: f64x2): } ; block0: -; fmla v2.2d, v2.2d, v0.2d, v1.2d -; mov v0.16b, v2.16b +; mov v5.16b, v2.16b +; fmla v5.2d, v5.2d, v0.2d, v1.2d +; mov v0.16b, v5.16b ; ret function %f81(f32x2, f32x2) -> f32x2 { diff --git a/cranelift/filetests/filetests/isa/aarch64/reftypes.clif b/cranelift/filetests/filetests/isa/aarch64/reftypes.clif index 5a514f587d..49520a0cf6 100644 --- a/cranelift/filetests/filetests/isa/aarch64/reftypes.clif +++ b/cranelift/filetests/filetests/isa/aarch64/reftypes.clif @@ -67,15 +67,15 @@ block3(v7: r64, v8: r64): ; mov fp, sp ; sub sp, sp, #32 ; block0: -; str x1, [sp, #16] ; str x0, [sp, #8] -; ldr x1, 8 ; b 12 ; data TestCase(%f) + 0 -; blr x1 -; mov x3, sp +; str x1, [sp, #16] +; ldr x3, 8 ; b 12 ; data TestCase(%f) + 0 +; blr x3 +; mov x2, sp ; ldr x9, [sp, #8] -; str x9, [x3] -; and w4, w0, #1 -; cbz x4, label1 ; b label3 +; str x9, [x2] +; and w3, w0, #1 +; cbz x3, label1 ; b label3 ; block1: ; b label2 ; block2: @@ -89,8 +89,8 @@ block3(v7: r64, v8: r64): ; ldr x1, [sp, #16] ; b label5 ; block5: -; mov x5, sp -; ldr x2, [x5] +; mov x4, sp +; ldr x2, [x4] ; add sp, sp, #32 ; ldp fp, lr, [sp], #16 ; ret diff --git a/cranelift/filetests/filetests/isa/aarch64/simd-narrow.clif b/cranelift/filetests/filetests/isa/aarch64/simd-narrow.clif index 50b147adff..b5940c2d64 100644 --- a/cranelift/filetests/filetests/isa/aarch64/simd-narrow.clif +++ b/cranelift/filetests/filetests/isa/aarch64/simd-narrow.clif @@ -9,8 +9,9 @@ block0(v0: i16x4, v1: i16x4): } ; block0: -; mov v0.d[1], v0.d[1], v1.d[0] -; sqxtn v0.8b, v0.8h +; mov v4.16b, v0.16b +; mov v4.d[1], v4.d[1], v1.d[0] +; sqxtn v0.8b, v4.8h ; ret function %snarrow_i16x8(i16x8, i16x8) -> i8x16 { @@ -31,8 +32,9 @@ block0(v0: i32x2, v1: i32x2): } ; block0: -; mov v0.d[1], v0.d[1], v1.d[0] -; sqxtn v0.4h, v0.4s +; mov v4.16b, v0.16b +; mov v4.d[1], v4.d[1], v1.d[0] +; sqxtn v0.4h, v4.4s ; ret function %snarrow_i32x4(i32x4, i32x4) -> i16x8 { @@ -64,8 +66,9 @@ block0(v0: i16x4, v1: i16x4): } ; block0: -; mov v0.d[1], v0.d[1], v1.d[0] -; sqxtun v0.8b, v0.8h +; mov v4.16b, v0.16b +; mov v4.d[1], v4.d[1], v1.d[0] +; sqxtun v0.8b, v4.8h ; ret function %unarrow_i16x8(i16x8, i16x8) -> i8x16 { @@ -86,8 +89,9 @@ block0(v0: i32x2, v1: i32x2): } ; block0: -; mov v0.d[1], v0.d[1], v1.d[0] -; sqxtun v0.4h, v0.4s +; mov v4.16b, v0.16b +; mov v4.d[1], v4.d[1], v1.d[0] +; sqxtun v0.4h, v4.4s ; ret function %unarrow_i32x4(i32x4, i32x4) -> i16x8 { @@ -119,8 +123,9 @@ block0(v0: i16x4, v1: i16x4): } ; block0: -; mov v0.d[1], v0.d[1], v1.d[0] -; uqxtn v0.8b, v0.8h +; mov v4.16b, v0.16b +; mov v4.d[1], v4.d[1], v1.d[0] +; uqxtn v0.8b, v4.8h ; ret function %uunarrow_i16x8(i16x8, i16x8) -> i8x16 { @@ -141,8 +146,9 @@ block0(v0: i32x2, v1: i32x2): } ; block0: -; mov v0.d[1], v0.d[1], v1.d[0] -; uqxtn v0.4h, v0.4s +; mov v4.16b, v0.16b +; mov v4.d[1], v4.d[1], v1.d[0] +; uqxtn v0.4h, v4.4s ; ret function %uunarrow_i32x4(i32x4, i32x4) -> i16x8 { diff --git a/cranelift/filetests/filetests/isa/aarch64/tls-elf-gd.clif b/cranelift/filetests/filetests/isa/aarch64/tls-elf-gd.clif index 016a624507..d31db4da88 100644 --- a/cranelift/filetests/filetests/isa/aarch64/tls-elf-gd.clif +++ b/cranelift/filetests/filetests/isa/aarch64/tls-elf-gd.clif @@ -20,8 +20,9 @@ block0(v0: i32): ; block0: ; mov x25, x0 ; elf_tls_get_addr x0, userextname0 +; mov x7, x25 ; mov x1, x0 -; mov x0, x25 +; mov x0, x7 ; ldp d8, d9, [sp], #16 ; ldp d10, d11, [sp], #16 ; ldp d12, d13, [sp], #16 diff --git a/cranelift/filetests/filetests/isa/s390x/atomic_cas-little.clif b/cranelift/filetests/filetests/isa/s390x/atomic_cas-little.clif index 0e095ac4e4..1bdeea3ac1 100644 --- a/cranelift/filetests/filetests/isa/s390x/atomic_cas-little.clif +++ b/cranelift/filetests/filetests/isa/s390x/atomic_cas-little.clif @@ -41,12 +41,13 @@ block0(v0: i64, v1: i16, v2: i16, v3: i64): ; block0: ; lgr %r9, %r4 ; sllk %r4, %r5, 3 -; nill %r5, 65532 -; lrvr %r2, %r3 +; lgr %r2, %r5 +; nill %r2, 65532 +; lrvr %r5, %r3 ; lgr %r3, %r9 ; lrvr %r3, %r3 -; l %r0, 0(%r5) -; 0: rll %r1, %r0, 16(%r4) ; rxsbg %r1, %r2, 176, 64, 48 ; jglh 1f ; risbgn %r1, %r3, 48, 64, 48 ; rll %r1, %r1, 16(%r4) ; cs %r0, %r1, 0(%r5) ; jglh 0b ; 1: +; l %r0, 0(%r2) +; 0: rll %r1, %r0, 16(%r4) ; rxsbg %r1, %r5, 176, 64, 48 ; jglh 1f ; risbgn %r1, %r3, 48, 64, 48 ; rll %r1, %r1, 16(%r4) ; cs %r0, %r1, 0(%r2) ; jglh 0b ; 1: ; rll %r2, %r0, 0(%r4) ; lrvr %r2, %r2 ; lmg %r9, %r15, 72(%r15) @@ -62,10 +63,11 @@ block0(v0: i64, v1: i8, v2: i8, v3: i64): ; block0: ; lgr %r11, %r4 ; sllk %r4, %r5, 3 -; nill %r5, 65532 -; lcr %r2, %r4 -; l %r0, 0(%r5) -; 0: rll %r1, %r0, 0(%r4) ; rxsbg %r1, %r3, 160, 40, 24 ; jglh 1f ; risbgn %r1, %r11, 32, 40, 24 ; rll %r1, %r1, 0(%r2) ; cs %r0, %r1, 0(%r5) ; jglh 0b ; 1: +; lgr %r2, %r5 +; nill %r2, 65532 +; lcr %r5, %r4 +; l %r0, 0(%r2) +; 0: rll %r1, %r0, 0(%r4) ; rxsbg %r1, %r3, 160, 40, 24 ; jglh 1f ; risbgn %r1, %r11, 32, 40, 24 ; rll %r1, %r1, 0(%r5) ; cs %r0, %r1, 0(%r2) ; jglh 0b ; 1: ; rll %r2, %r0, 8(%r4) ; lmg %r11, %r15, 88(%r15) ; br %r14 diff --git a/cranelift/filetests/filetests/isa/s390x/atomic_cas.clif b/cranelift/filetests/filetests/isa/s390x/atomic_cas.clif index 05e9650c65..db516f8bb4 100644 --- a/cranelift/filetests/filetests/isa/s390x/atomic_cas.clif +++ b/cranelift/filetests/filetests/isa/s390x/atomic_cas.clif @@ -31,13 +31,17 @@ block0(v0: i64, v1: i16, v2: i16, v3: i64): return v4 } +; stmg %r6, %r15, 48(%r15) ; block0: -; lgr %r2, %r4 +; lgr %r6, %r4 ; sllk %r4, %r5, 3 -; nill %r5, 65532 -; l %r0, 0(%r5) -; 0: rll %r1, %r0, 0(%r4) ; rxsbg %r1, %r3, 160, 48, 16 ; jglh 1f ; risbgn %r1, %r2, 32, 48, 16 ; rll %r1, %r1, 0(%r4) ; cs %r0, %r1, 0(%r5) ; jglh 0b ; 1: +; lgr %r2, %r5 +; nill %r2, 65532 +; l %r0, 0(%r2) +; lgr %r5, %r6 +; 0: rll %r1, %r0, 0(%r4) ; rxsbg %r1, %r3, 160, 48, 16 ; jglh 1f ; risbgn %r1, %r5, 32, 48, 16 ; rll %r1, %r1, 0(%r4) ; cs %r0, %r1, 0(%r2) ; jglh 0b ; 1: ; rll %r2, %r0, 16(%r4) +; lmg %r6, %r15, 48(%r15) ; br %r14 function %atomic_cas_i8(i64, i8, i8, i64) -> i8 { @@ -50,10 +54,11 @@ block0(v0: i64, v1: i8, v2: i8, v3: i64): ; block0: ; lgr %r11, %r4 ; sllk %r4, %r5, 3 -; nill %r5, 65532 -; lcr %r2, %r4 -; l %r0, 0(%r5) -; 0: rll %r1, %r0, 0(%r4) ; rxsbg %r1, %r3, 160, 40, 24 ; jglh 1f ; risbgn %r1, %r11, 32, 40, 24 ; rll %r1, %r1, 0(%r2) ; cs %r0, %r1, 0(%r5) ; jglh 0b ; 1: +; lgr %r2, %r5 +; nill %r2, 65532 +; lcr %r5, %r4 +; l %r0, 0(%r2) +; 0: rll %r1, %r0, 0(%r4) ; rxsbg %r1, %r3, 160, 40, 24 ; jglh 1f ; risbgn %r1, %r11, 32, 40, 24 ; rll %r1, %r1, 0(%r5) ; cs %r0, %r1, 0(%r2) ; jglh 0b ; 1: ; rll %r2, %r0, 8(%r4) ; lmg %r11, %r15, 88(%r15) ; br %r14 diff --git a/cranelift/filetests/filetests/isa/s390x/atomic_rmw-arch13.clif b/cranelift/filetests/filetests/isa/s390x/atomic_rmw-arch13.clif index 498df64a16..3e80200734 100644 --- a/cranelift/filetests/filetests/isa/s390x/atomic_rmw-arch13.clif +++ b/cranelift/filetests/filetests/isa/s390x/atomic_rmw-arch13.clif @@ -32,12 +32,12 @@ block0(v0: i64, v1: i64, v2: i16): } ; block0: -; lgr %r5, %r4 -; sllk %r4, %r3, 3 -; nill %r3, 65532 -; l %r0, 0(%r3) -; 0: rll %r1, %r0, 0(%r4) ; rnsbg %r1, %r5, 32, 48, 16 ; xilf %r1, 4294901760 ; rll %r1, %r1, 0(%r4) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: -; rll %r2, %r0, 16(%r4) +; sllk %r2, %r3, 3 +; lgr %r5, %r3 +; nill %r5, 65532 +; l %r0, 0(%r5) +; 0: rll %r1, %r0, 0(%r2) ; rnsbg %r1, %r4, 32, 48, 16 ; xilf %r1, 4294901760 ; rll %r1, %r1, 0(%r2) ; cs %r0, %r1, 0(%r5) ; jglh 0b ; 1: +; rll %r2, %r0, 16(%r2) ; br %r14 function %atomic_rmw_nand_i8(i64, i64, i8) -> i8 { @@ -46,14 +46,16 @@ block0(v0: i64, v1: i64, v2: i8): return v3 } +; stmg %r11, %r15, 88(%r15) ; block0: -; lgr %r2, %r4 -; sllk %r4, %r3, 3 -; nill %r3, 65532 -; lcr %r5, %r4 -; l %r0, 0(%r3) -; 0: rll %r1, %r0, 0(%r4) ; rnsbg %r1, %r2, 32, 40, 24 ; xilf %r1, 4278190080 ; rll %r1, %r1, 0(%r5) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: -; rll %r2, %r0, 8(%r4) +; sllk %r11, %r3, 3 +; lgr %r5, %r3 +; nill %r5, 65532 +; lcr %r2, %r11 +; l %r0, 0(%r5) +; 0: rll %r1, %r0, 0(%r11) ; rnsbg %r1, %r4, 32, 40, 24 ; xilf %r1, 4278190080 ; rll %r1, %r1, 0(%r2) ; cs %r0, %r1, 0(%r5) ; jglh 0b ; 1: +; rll %r2, %r0, 8(%r11) +; lmg %r11, %r15, 88(%r15) ; br %r14 function %atomic_rmw_nand_i64(i64, i64, i64) -> i64 { @@ -89,13 +91,13 @@ block0(v0: i64, v1: i64, v2: i16): } ; block0: -; lgr %r2, %r4 -; sllk %r4, %r3, 3 -; nill %r3, 65532 -; lrvr %r5, %r2 -; l %r0, 0(%r3) -; 0: rll %r1, %r0, 16(%r4) ; rnsbg %r1, %r5, 48, 64, 48 ; xilf %r1, 65535 ; rll %r1, %r1, 16(%r4) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: -; rll %r3, %r0, 0(%r4) +; sllk %r2, %r3, 3 +; lgr %r5, %r3 +; nill %r5, 65532 +; lrvr %r4, %r4 +; l %r0, 0(%r5) +; 0: rll %r1, %r0, 16(%r2) ; rnsbg %r1, %r4, 48, 64, 48 ; xilf %r1, 65535 ; rll %r1, %r1, 16(%r2) ; cs %r0, %r1, 0(%r5) ; jglh 0b ; 1: +; rll %r3, %r0, 0(%r2) ; lrvr %r2, %r3 ; br %r14 @@ -105,13 +107,15 @@ block0(v0: i64, v1: i64, v2: i8): return v3 } +; stmg %r11, %r15, 88(%r15) ; block0: -; lgr %r2, %r4 -; sllk %r4, %r3, 3 -; nill %r3, 65532 -; lcr %r5, %r4 -; l %r0, 0(%r3) -; 0: rll %r1, %r0, 0(%r4) ; rnsbg %r1, %r2, 32, 40, 24 ; xilf %r1, 4278190080 ; rll %r1, %r1, 0(%r5) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: -; rll %r2, %r0, 8(%r4) +; sllk %r11, %r3, 3 +; lgr %r5, %r3 +; nill %r5, 65532 +; lcr %r2, %r11 +; l %r0, 0(%r5) +; 0: rll %r1, %r0, 0(%r11) ; rnsbg %r1, %r4, 32, 40, 24 ; xilf %r1, 4278190080 ; rll %r1, %r1, 0(%r2) ; cs %r0, %r1, 0(%r5) ; jglh 0b ; 1: +; rll %r2, %r0, 8(%r11) +; lmg %r11, %r15, 88(%r15) ; br %r14 diff --git a/cranelift/filetests/filetests/isa/s390x/atomic_rmw-little.clif b/cranelift/filetests/filetests/isa/s390x/atomic_rmw-little.clif index b7ca9939a0..64eb171ba5 100644 --- a/cranelift/filetests/filetests/isa/s390x/atomic_rmw-little.clif +++ b/cranelift/filetests/filetests/isa/s390x/atomic_rmw-little.clif @@ -38,13 +38,13 @@ block0(v0: i64, v1: i64, v2: i16): } ; block0: -; lgr %r2, %r4 -; sllk %r4, %r3, 3 -; nill %r3, 65532 -; lrvr %r5, %r2 -; l %r0, 0(%r3) -; 0: rll %r1, %r0, 16(%r4) ; risbgn %r1, %r5, 48, 64, 48 ; rll %r1, %r1, 16(%r4) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: -; rll %r3, %r0, 0(%r4) +; sllk %r2, %r3, 3 +; lgr %r5, %r3 +; nill %r5, 65532 +; lrvr %r4, %r4 +; l %r0, 0(%r5) +; 0: rll %r1, %r0, 16(%r2) ; risbgn %r1, %r4, 48, 64, 48 ; rll %r1, %r1, 16(%r2) ; cs %r0, %r1, 0(%r5) ; jglh 0b ; 1: +; rll %r3, %r0, 0(%r2) ; lrvr %r2, %r3 ; br %r14 @@ -54,14 +54,16 @@ block0(v0: i64, v1: i64, v2: i8): return v3 } +; stmg %r11, %r15, 88(%r15) ; block0: -; lgr %r2, %r4 -; sllk %r4, %r3, 3 -; nill %r3, 65532 -; lcr %r5, %r4 -; l %r0, 0(%r3) -; 0: rll %r1, %r0, 0(%r4) ; risbgn %r1, %r2, 32, 40, 24 ; rll %r1, %r1, 0(%r5) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: -; rll %r2, %r0, 8(%r4) +; sllk %r11, %r3, 3 +; lgr %r5, %r3 +; nill %r5, 65532 +; lcr %r2, %r11 +; l %r0, 0(%r5) +; 0: rll %r1, %r0, 0(%r11) ; risbgn %r1, %r4, 32, 40, 24 ; rll %r1, %r1, 0(%r2) ; cs %r0, %r1, 0(%r5) ; jglh 0b ; 1: +; rll %r2, %r0, 8(%r11) +; lmg %r11, %r15, 88(%r15) ; br %r14 function %atomic_rmw_add_i64(i64, i64, i64) -> i64 { @@ -95,13 +97,13 @@ block0(v0: i64, v1: i64, v2: i16): } ; block0: -; lgr %r2, %r4 -; sllk %r4, %r3, 3 -; nill %r3, 65532 -; sllk %r5, %r2, 16 -; l %r0, 0(%r3) -; 0: rll %r1, %r0, 16(%r4) ; lrvr %r1, %r1 ; ar %r1, %r5 ; lrvr %r1, %r1 ; rll %r1, %r1, 16(%r4) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: -; rll %r3, %r0, 0(%r4) +; sllk %r2, %r3, 3 +; lgr %r5, %r3 +; nill %r5, 65532 +; sllk %r4, %r4, 16 +; l %r0, 0(%r5) +; 0: rll %r1, %r0, 16(%r2) ; lrvr %r1, %r1 ; ar %r1, %r4 ; lrvr %r1, %r1 ; rll %r1, %r1, 16(%r2) ; cs %r0, %r1, 0(%r5) ; jglh 0b ; 1: +; rll %r3, %r0, 0(%r2) ; lrvr %r2, %r3 ; br %r14 @@ -111,14 +113,17 @@ block0(v0: i64, v1: i64, v2: i8): return v3 } +; stmg %r11, %r15, 88(%r15) ; block0: -; sllk %r2, %r3, 3 -; nill %r3, 65532 -; sllk %r5, %r4, 24 -; lcr %r4, %r2 -; l %r0, 0(%r3) -; 0: rll %r1, %r0, 0(%r2) ; ar %r1, %r5 ; rll %r1, %r1, 0(%r4) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: -; rll %r2, %r0, 8(%r2) +; sllk %r11, %r3, 3 +; lgr %r5, %r3 +; nill %r5, 65532 +; sllk %r4, %r4, 24 +; lcr %r2, %r11 +; l %r0, 0(%r5) +; 0: rll %r1, %r0, 0(%r11) ; ar %r1, %r4 ; rll %r1, %r1, 0(%r2) ; cs %r0, %r1, 0(%r5) ; jglh 0b ; 1: +; rll %r2, %r0, 8(%r11) +; lmg %r11, %r15, 88(%r15) ; br %r14 function %atomic_rmw_sub_i64(i64, i64, i64) -> i64 { @@ -152,13 +157,13 @@ block0(v0: i64, v1: i64, v2: i16): } ; block0: -; lgr %r2, %r4 -; sllk %r4, %r3, 3 -; nill %r3, 65532 -; sllk %r5, %r2, 16 -; l %r0, 0(%r3) -; 0: rll %r1, %r0, 16(%r4) ; lrvr %r1, %r1 ; sr %r1, %r5 ; lrvr %r1, %r1 ; rll %r1, %r1, 16(%r4) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: -; rll %r3, %r0, 0(%r4) +; sllk %r2, %r3, 3 +; lgr %r5, %r3 +; nill %r5, 65532 +; sllk %r4, %r4, 16 +; l %r0, 0(%r5) +; 0: rll %r1, %r0, 16(%r2) ; lrvr %r1, %r1 ; sr %r1, %r4 ; lrvr %r1, %r1 ; rll %r1, %r1, 16(%r2) ; cs %r0, %r1, 0(%r5) ; jglh 0b ; 1: +; rll %r3, %r0, 0(%r2) ; lrvr %r2, %r3 ; br %r14 @@ -168,14 +173,17 @@ block0(v0: i64, v1: i64, v2: i8): return v3 } +; stmg %r11, %r15, 88(%r15) ; block0: -; sllk %r2, %r3, 3 -; nill %r3, 65532 -; sllk %r5, %r4, 24 -; lcr %r4, %r2 -; l %r0, 0(%r3) -; 0: rll %r1, %r0, 0(%r2) ; sr %r1, %r5 ; rll %r1, %r1, 0(%r4) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: -; rll %r2, %r0, 8(%r2) +; sllk %r11, %r3, 3 +; lgr %r5, %r3 +; nill %r5, 65532 +; sllk %r4, %r4, 24 +; lcr %r2, %r11 +; l %r0, 0(%r5) +; 0: rll %r1, %r0, 0(%r11) ; sr %r1, %r4 ; rll %r1, %r1, 0(%r2) ; cs %r0, %r1, 0(%r5) ; jglh 0b ; 1: +; rll %r2, %r0, 8(%r11) +; lmg %r11, %r15, 88(%r15) ; br %r14 function %atomic_rmw_and_i64(i64, i64, i64) -> i64 { @@ -209,13 +217,13 @@ block0(v0: i64, v1: i64, v2: i16): } ; block0: -; lgr %r2, %r4 -; sllk %r4, %r3, 3 -; nill %r3, 65532 -; lrvr %r5, %r2 -; l %r0, 0(%r3) -; 0: rll %r1, %r0, 16(%r4) ; rnsbg %r1, %r5, 48, 64, 48 ; rll %r1, %r1, 16(%r4) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: -; rll %r3, %r0, 0(%r4) +; sllk %r2, %r3, 3 +; lgr %r5, %r3 +; nill %r5, 65532 +; lrvr %r4, %r4 +; l %r0, 0(%r5) +; 0: rll %r1, %r0, 16(%r2) ; rnsbg %r1, %r4, 48, 64, 48 ; rll %r1, %r1, 16(%r2) ; cs %r0, %r1, 0(%r5) ; jglh 0b ; 1: +; rll %r3, %r0, 0(%r2) ; lrvr %r2, %r3 ; br %r14 @@ -225,14 +233,16 @@ block0(v0: i64, v1: i64, v2: i8): return v3 } +; stmg %r11, %r15, 88(%r15) ; block0: -; lgr %r2, %r4 -; sllk %r4, %r3, 3 -; nill %r3, 65532 -; lcr %r5, %r4 -; l %r0, 0(%r3) -; 0: rll %r1, %r0, 0(%r4) ; rnsbg %r1, %r2, 32, 40, 24 ; rll %r1, %r1, 0(%r5) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: -; rll %r2, %r0, 8(%r4) +; sllk %r11, %r3, 3 +; lgr %r5, %r3 +; nill %r5, 65532 +; lcr %r2, %r11 +; l %r0, 0(%r5) +; 0: rll %r1, %r0, 0(%r11) ; rnsbg %r1, %r4, 32, 40, 24 ; rll %r1, %r1, 0(%r2) ; cs %r0, %r1, 0(%r5) ; jglh 0b ; 1: +; rll %r2, %r0, 8(%r11) +; lmg %r11, %r15, 88(%r15) ; br %r14 function %atomic_rmw_or_i64(i64, i64, i64) -> i64 { @@ -266,13 +276,13 @@ block0(v0: i64, v1: i64, v2: i16): } ; block0: -; lgr %r2, %r4 -; sllk %r4, %r3, 3 -; nill %r3, 65532 -; lrvr %r5, %r2 -; l %r0, 0(%r3) -; 0: rll %r1, %r0, 16(%r4) ; rosbg %r1, %r5, 48, 64, 48 ; rll %r1, %r1, 16(%r4) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: -; rll %r3, %r0, 0(%r4) +; sllk %r2, %r3, 3 +; lgr %r5, %r3 +; nill %r5, 65532 +; lrvr %r4, %r4 +; l %r0, 0(%r5) +; 0: rll %r1, %r0, 16(%r2) ; rosbg %r1, %r4, 48, 64, 48 ; rll %r1, %r1, 16(%r2) ; cs %r0, %r1, 0(%r5) ; jglh 0b ; 1: +; rll %r3, %r0, 0(%r2) ; lrvr %r2, %r3 ; br %r14 @@ -282,14 +292,16 @@ block0(v0: i64, v1: i64, v2: i8): return v3 } +; stmg %r11, %r15, 88(%r15) ; block0: -; lgr %r2, %r4 -; sllk %r4, %r3, 3 -; nill %r3, 65532 -; lcr %r5, %r4 -; l %r0, 0(%r3) -; 0: rll %r1, %r0, 0(%r4) ; rosbg %r1, %r2, 32, 40, 24 ; rll %r1, %r1, 0(%r5) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: -; rll %r2, %r0, 8(%r4) +; sllk %r11, %r3, 3 +; lgr %r5, %r3 +; nill %r5, 65532 +; lcr %r2, %r11 +; l %r0, 0(%r5) +; 0: rll %r1, %r0, 0(%r11) ; rosbg %r1, %r4, 32, 40, 24 ; rll %r1, %r1, 0(%r2) ; cs %r0, %r1, 0(%r5) ; jglh 0b ; 1: +; rll %r2, %r0, 8(%r11) +; lmg %r11, %r15, 88(%r15) ; br %r14 function %atomic_rmw_xor_i64(i64, i64, i64) -> i64 { @@ -323,13 +335,13 @@ block0(v0: i64, v1: i64, v2: i16): } ; block0: -; lgr %r2, %r4 -; sllk %r4, %r3, 3 -; nill %r3, 65532 -; lrvr %r5, %r2 -; l %r0, 0(%r3) -; 0: rll %r1, %r0, 16(%r4) ; rxsbg %r1, %r5, 48, 64, 48 ; rll %r1, %r1, 16(%r4) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: -; rll %r3, %r0, 0(%r4) +; sllk %r2, %r3, 3 +; lgr %r5, %r3 +; nill %r5, 65532 +; lrvr %r4, %r4 +; l %r0, 0(%r5) +; 0: rll %r1, %r0, 16(%r2) ; rxsbg %r1, %r4, 48, 64, 48 ; rll %r1, %r1, 16(%r2) ; cs %r0, %r1, 0(%r5) ; jglh 0b ; 1: +; rll %r3, %r0, 0(%r2) ; lrvr %r2, %r3 ; br %r14 @@ -339,14 +351,16 @@ block0(v0: i64, v1: i64, v2: i8): return v3 } +; stmg %r11, %r15, 88(%r15) ; block0: -; lgr %r2, %r4 -; sllk %r4, %r3, 3 -; nill %r3, 65532 -; lcr %r5, %r4 -; l %r0, 0(%r3) -; 0: rll %r1, %r0, 0(%r4) ; rxsbg %r1, %r2, 32, 40, 24 ; rll %r1, %r1, 0(%r5) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: -; rll %r2, %r0, 8(%r4) +; sllk %r11, %r3, 3 +; lgr %r5, %r3 +; nill %r5, 65532 +; lcr %r2, %r11 +; l %r0, 0(%r5) +; 0: rll %r1, %r0, 0(%r11) ; rxsbg %r1, %r4, 32, 40, 24 ; rll %r1, %r1, 0(%r2) ; cs %r0, %r1, 0(%r5) ; jglh 0b ; 1: +; rll %r2, %r0, 8(%r11) +; lmg %r11, %r15, 88(%r15) ; br %r14 function %atomic_rmw_nand_i64(i64, i64, i64) -> i64 { @@ -382,13 +396,13 @@ block0(v0: i64, v1: i64, v2: i16): } ; block0: -; lgr %r2, %r4 -; sllk %r4, %r3, 3 -; nill %r3, 65532 -; lrvr %r5, %r2 -; l %r0, 0(%r3) -; 0: rll %r1, %r0, 16(%r4) ; rnsbg %r1, %r5, 48, 64, 48 ; xilf %r1, 65535 ; rll %r1, %r1, 16(%r4) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: -; rll %r3, %r0, 0(%r4) +; sllk %r2, %r3, 3 +; lgr %r5, %r3 +; nill %r5, 65532 +; lrvr %r4, %r4 +; l %r0, 0(%r5) +; 0: rll %r1, %r0, 16(%r2) ; rnsbg %r1, %r4, 48, 64, 48 ; xilf %r1, 65535 ; rll %r1, %r1, 16(%r2) ; cs %r0, %r1, 0(%r5) ; jglh 0b ; 1: +; rll %r3, %r0, 0(%r2) ; lrvr %r2, %r3 ; br %r14 @@ -398,14 +412,16 @@ block0(v0: i64, v1: i64, v2: i8): return v3 } +; stmg %r11, %r15, 88(%r15) ; block0: -; lgr %r2, %r4 -; sllk %r4, %r3, 3 -; nill %r3, 65532 -; lcr %r5, %r4 -; l %r0, 0(%r3) -; 0: rll %r1, %r0, 0(%r4) ; rnsbg %r1, %r2, 32, 40, 24 ; xilf %r1, 4278190080 ; rll %r1, %r1, 0(%r5) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: -; rll %r2, %r0, 8(%r4) +; sllk %r11, %r3, 3 +; lgr %r5, %r3 +; nill %r5, 65532 +; lcr %r2, %r11 +; l %r0, 0(%r5) +; 0: rll %r1, %r0, 0(%r11) ; rnsbg %r1, %r4, 32, 40, 24 ; xilf %r1, 4278190080 ; rll %r1, %r1, 0(%r2) ; cs %r0, %r1, 0(%r5) ; jglh 0b ; 1: +; rll %r2, %r0, 8(%r11) +; lmg %r11, %r15, 88(%r15) ; br %r14 function %atomic_rmw_smin_i64(i64, i64, i64) -> i64 { @@ -439,13 +455,13 @@ block0(v0: i64, v1: i64, v2: i16): } ; block0: -; lgr %r2, %r4 -; sllk %r4, %r3, 3 -; nill %r3, 65532 -; sllk %r5, %r2, 16 -; l %r0, 0(%r3) -; 0: rll %r1, %r0, 16(%r4) ; lrvr %r1, %r1 ; cr %r5, %r1 ; jgnl 1f ; risbgn %r1, %r5, 32, 48, 0 ; lrvr %r1, %r1 ; rll %r1, %r1, 16(%r4) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: -; rll %r3, %r0, 0(%r4) +; sllk %r2, %r3, 3 +; lgr %r5, %r3 +; nill %r5, 65532 +; sllk %r4, %r4, 16 +; l %r0, 0(%r5) +; 0: rll %r1, %r0, 16(%r2) ; lrvr %r1, %r1 ; cr %r4, %r1 ; jgnl 1f ; risbgn %r1, %r4, 32, 48, 0 ; lrvr %r1, %r1 ; rll %r1, %r1, 16(%r2) ; cs %r0, %r1, 0(%r5) ; jglh 0b ; 1: +; rll %r3, %r0, 0(%r2) ; lrvr %r2, %r3 ; br %r14 @@ -455,14 +471,17 @@ block0(v0: i64, v1: i64, v2: i8): return v3 } +; stmg %r11, %r15, 88(%r15) ; block0: -; sllk %r2, %r3, 3 -; nill %r3, 65532 -; sllk %r5, %r4, 24 -; lcr %r4, %r2 -; l %r0, 0(%r3) -; 0: rll %r1, %r0, 0(%r2) ; cr %r5, %r1 ; jgnl 1f ; risbgn %r1, %r5, 32, 40, 0 ; rll %r1, %r1, 0(%r4) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: -; rll %r2, %r0, 8(%r2) +; sllk %r11, %r3, 3 +; lgr %r5, %r3 +; nill %r5, 65532 +; sllk %r4, %r4, 24 +; lcr %r2, %r11 +; l %r0, 0(%r5) +; 0: rll %r1, %r0, 0(%r11) ; cr %r4, %r1 ; jgnl 1f ; risbgn %r1, %r4, 32, 40, 0 ; rll %r1, %r1, 0(%r2) ; cs %r0, %r1, 0(%r5) ; jglh 0b ; 1: +; rll %r2, %r0, 8(%r11) +; lmg %r11, %r15, 88(%r15) ; br %r14 function %atomic_rmw_smax_i64(i64, i64, i64) -> i64 { @@ -496,13 +515,13 @@ block0(v0: i64, v1: i64, v2: i16): } ; block0: -; lgr %r2, %r4 -; sllk %r4, %r3, 3 -; nill %r3, 65532 -; sllk %r5, %r2, 16 -; l %r0, 0(%r3) -; 0: rll %r1, %r0, 16(%r4) ; lrvr %r1, %r1 ; cr %r5, %r1 ; jgnh 1f ; risbgn %r1, %r5, 32, 48, 0 ; lrvr %r1, %r1 ; rll %r1, %r1, 16(%r4) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: -; rll %r3, %r0, 0(%r4) +; sllk %r2, %r3, 3 +; lgr %r5, %r3 +; nill %r5, 65532 +; sllk %r4, %r4, 16 +; l %r0, 0(%r5) +; 0: rll %r1, %r0, 16(%r2) ; lrvr %r1, %r1 ; cr %r4, %r1 ; jgnh 1f ; risbgn %r1, %r4, 32, 48, 0 ; lrvr %r1, %r1 ; rll %r1, %r1, 16(%r2) ; cs %r0, %r1, 0(%r5) ; jglh 0b ; 1: +; rll %r3, %r0, 0(%r2) ; lrvr %r2, %r3 ; br %r14 @@ -512,14 +531,17 @@ block0(v0: i64, v1: i64, v2: i8): return v3 } +; stmg %r11, %r15, 88(%r15) ; block0: -; sllk %r2, %r3, 3 -; nill %r3, 65532 -; sllk %r5, %r4, 24 -; lcr %r4, %r2 -; l %r0, 0(%r3) -; 0: rll %r1, %r0, 0(%r2) ; cr %r5, %r1 ; jgnh 1f ; risbgn %r1, %r5, 32, 40, 0 ; rll %r1, %r1, 0(%r4) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: -; rll %r2, %r0, 8(%r2) +; sllk %r11, %r3, 3 +; lgr %r5, %r3 +; nill %r5, 65532 +; sllk %r4, %r4, 24 +; lcr %r2, %r11 +; l %r0, 0(%r5) +; 0: rll %r1, %r0, 0(%r11) ; cr %r4, %r1 ; jgnh 1f ; risbgn %r1, %r4, 32, 40, 0 ; rll %r1, %r1, 0(%r2) ; cs %r0, %r1, 0(%r5) ; jglh 0b ; 1: +; rll %r2, %r0, 8(%r11) +; lmg %r11, %r15, 88(%r15) ; br %r14 function %atomic_rmw_umin_i64(i64, i64, i64) -> i64 { @@ -553,13 +575,13 @@ block0(v0: i64, v1: i64, v2: i16): } ; block0: -; lgr %r2, %r4 -; sllk %r4, %r3, 3 -; nill %r3, 65532 -; sllk %r5, %r2, 16 -; l %r0, 0(%r3) -; 0: rll %r1, %r0, 16(%r4) ; lrvr %r1, %r1 ; clr %r5, %r1 ; jgnl 1f ; risbgn %r1, %r5, 32, 48, 0 ; lrvr %r1, %r1 ; rll %r1, %r1, 16(%r4) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: -; rll %r3, %r0, 0(%r4) +; sllk %r2, %r3, 3 +; lgr %r5, %r3 +; nill %r5, 65532 +; sllk %r4, %r4, 16 +; l %r0, 0(%r5) +; 0: rll %r1, %r0, 16(%r2) ; lrvr %r1, %r1 ; clr %r4, %r1 ; jgnl 1f ; risbgn %r1, %r4, 32, 48, 0 ; lrvr %r1, %r1 ; rll %r1, %r1, 16(%r2) ; cs %r0, %r1, 0(%r5) ; jglh 0b ; 1: +; rll %r3, %r0, 0(%r2) ; lrvr %r2, %r3 ; br %r14 @@ -569,14 +591,17 @@ block0(v0: i64, v1: i64, v2: i8): return v3 } +; stmg %r11, %r15, 88(%r15) ; block0: -; sllk %r2, %r3, 3 -; nill %r3, 65532 -; sllk %r5, %r4, 24 -; lcr %r4, %r2 -; l %r0, 0(%r3) -; 0: rll %r1, %r0, 0(%r2) ; clr %r5, %r1 ; jgnl 1f ; risbgn %r1, %r5, 32, 40, 0 ; rll %r1, %r1, 0(%r4) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: -; rll %r2, %r0, 8(%r2) +; sllk %r11, %r3, 3 +; lgr %r5, %r3 +; nill %r5, 65532 +; sllk %r4, %r4, 24 +; lcr %r2, %r11 +; l %r0, 0(%r5) +; 0: rll %r1, %r0, 0(%r11) ; clr %r4, %r1 ; jgnl 1f ; risbgn %r1, %r4, 32, 40, 0 ; rll %r1, %r1, 0(%r2) ; cs %r0, %r1, 0(%r5) ; jglh 0b ; 1: +; rll %r2, %r0, 8(%r11) +; lmg %r11, %r15, 88(%r15) ; br %r14 function %atomic_rmw_umax_i64(i64, i64, i64) -> i64 { @@ -610,13 +635,13 @@ block0(v0: i64, v1: i64, v2: i16): } ; block0: -; lgr %r2, %r4 -; sllk %r4, %r3, 3 -; nill %r3, 65532 -; sllk %r5, %r2, 16 -; l %r0, 0(%r3) -; 0: rll %r1, %r0, 16(%r4) ; lrvr %r1, %r1 ; clr %r5, %r1 ; jgnh 1f ; risbgn %r1, %r5, 32, 48, 0 ; lrvr %r1, %r1 ; rll %r1, %r1, 16(%r4) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: -; rll %r3, %r0, 0(%r4) +; sllk %r2, %r3, 3 +; lgr %r5, %r3 +; nill %r5, 65532 +; sllk %r4, %r4, 16 +; l %r0, 0(%r5) +; 0: rll %r1, %r0, 16(%r2) ; lrvr %r1, %r1 ; clr %r4, %r1 ; jgnh 1f ; risbgn %r1, %r4, 32, 48, 0 ; lrvr %r1, %r1 ; rll %r1, %r1, 16(%r2) ; cs %r0, %r1, 0(%r5) ; jglh 0b ; 1: +; rll %r3, %r0, 0(%r2) ; lrvr %r2, %r3 ; br %r14 @@ -626,13 +651,16 @@ block0(v0: i64, v1: i64, v2: i8): return v3 } +; stmg %r11, %r15, 88(%r15) ; block0: -; sllk %r2, %r3, 3 -; nill %r3, 65532 -; sllk %r5, %r4, 24 -; lcr %r4, %r2 -; l %r0, 0(%r3) -; 0: rll %r1, %r0, 0(%r2) ; clr %r5, %r1 ; jgnh 1f ; risbgn %r1, %r5, 32, 40, 0 ; rll %r1, %r1, 0(%r4) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: -; rll %r2, %r0, 8(%r2) +; sllk %r11, %r3, 3 +; lgr %r5, %r3 +; nill %r5, 65532 +; sllk %r4, %r4, 24 +; lcr %r2, %r11 +; l %r0, 0(%r5) +; 0: rll %r1, %r0, 0(%r11) ; clr %r4, %r1 ; jgnh 1f ; risbgn %r1, %r4, 32, 40, 0 ; rll %r1, %r1, 0(%r2) ; cs %r0, %r1, 0(%r5) ; jglh 0b ; 1: +; rll %r2, %r0, 8(%r11) +; lmg %r11, %r15, 88(%r15) ; br %r14 diff --git a/cranelift/filetests/filetests/isa/s390x/atomic_rmw.clif b/cranelift/filetests/filetests/isa/s390x/atomic_rmw.clif index f1bf6f23dc..f95940aae7 100644 --- a/cranelift/filetests/filetests/isa/s390x/atomic_rmw.clif +++ b/cranelift/filetests/filetests/isa/s390x/atomic_rmw.clif @@ -36,12 +36,12 @@ block0(v0: i64, v1: i64, v2: i16): } ; block0: -; lgr %r5, %r4 -; sllk %r4, %r3, 3 -; nill %r3, 65532 -; l %r0, 0(%r3) -; 0: rll %r1, %r0, 0(%r4) ; risbgn %r1, %r5, 32, 48, 16 ; rll %r1, %r1, 0(%r4) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: -; rll %r2, %r0, 16(%r4) +; sllk %r2, %r3, 3 +; lgr %r5, %r3 +; nill %r5, 65532 +; l %r0, 0(%r5) +; 0: rll %r1, %r0, 0(%r2) ; risbgn %r1, %r4, 32, 48, 16 ; rll %r1, %r1, 0(%r2) ; cs %r0, %r1, 0(%r5) ; jglh 0b ; 1: +; rll %r2, %r0, 16(%r2) ; br %r14 function %atomic_rmw_xchg_i8(i64, i64, i8) -> i8 { @@ -50,14 +50,16 @@ block0(v0: i64, v1: i64, v2: i8): return v3 } +; stmg %r11, %r15, 88(%r15) ; block0: -; lgr %r2, %r4 -; sllk %r4, %r3, 3 -; nill %r3, 65532 -; lcr %r5, %r4 -; l %r0, 0(%r3) -; 0: rll %r1, %r0, 0(%r4) ; risbgn %r1, %r2, 32, 40, 24 ; rll %r1, %r1, 0(%r5) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: -; rll %r2, %r0, 8(%r4) +; sllk %r11, %r3, 3 +; lgr %r5, %r3 +; nill %r5, 65532 +; lcr %r2, %r11 +; l %r0, 0(%r5) +; 0: rll %r1, %r0, 0(%r11) ; risbgn %r1, %r4, 32, 40, 24 ; rll %r1, %r1, 0(%r2) ; cs %r0, %r1, 0(%r5) ; jglh 0b ; 1: +; rll %r2, %r0, 8(%r11) +; lmg %r11, %r15, 88(%r15) ; br %r14 function %atomic_rmw_add_i64(i64, i64) -> i64 { @@ -87,13 +89,13 @@ block0(v0: i64, v1: i64, v2: i16): } ; block0: -; lgr %r5, %r4 -; sllk %r4, %r3, 3 -; nill %r3, 65532 -; sllk %r5, %r5, 16 -; l %r0, 0(%r3) -; 0: rll %r1, %r0, 0(%r4) ; ar %r1, %r5 ; rll %r1, %r1, 0(%r4) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: -; rll %r2, %r0, 16(%r4) +; sllk %r2, %r3, 3 +; lgr %r5, %r3 +; nill %r5, 65532 +; sllk %r4, %r4, 16 +; l %r0, 0(%r5) +; 0: rll %r1, %r0, 0(%r2) ; ar %r1, %r4 ; rll %r1, %r1, 0(%r2) ; cs %r0, %r1, 0(%r5) ; jglh 0b ; 1: +; rll %r2, %r0, 16(%r2) ; br %r14 function %atomic_rmw_add_i8(i64, i64, i8) -> i8 { @@ -102,14 +104,17 @@ block0(v0: i64, v1: i64, v2: i8): return v3 } +; stmg %r11, %r15, 88(%r15) ; block0: -; sllk %r2, %r3, 3 -; nill %r3, 65532 -; sllk %r5, %r4, 24 -; lcr %r4, %r2 -; l %r0, 0(%r3) -; 0: rll %r1, %r0, 0(%r2) ; ar %r1, %r5 ; rll %r1, %r1, 0(%r4) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: -; rll %r2, %r0, 8(%r2) +; sllk %r11, %r3, 3 +; lgr %r5, %r3 +; nill %r5, 65532 +; sllk %r4, %r4, 24 +; lcr %r2, %r11 +; l %r0, 0(%r5) +; 0: rll %r1, %r0, 0(%r11) ; ar %r1, %r4 ; rll %r1, %r1, 0(%r2) ; cs %r0, %r1, 0(%r5) ; jglh 0b ; 1: +; rll %r2, %r0, 8(%r11) +; lmg %r11, %r15, 88(%r15) ; br %r14 function %atomic_rmw_sub_i64(i64, i64) -> i64 { @@ -141,13 +146,13 @@ block0(v0: i64, v1: i64, v2: i16): } ; block0: -; lgr %r5, %r4 -; sllk %r4, %r3, 3 -; nill %r3, 65532 -; sllk %r5, %r5, 16 -; l %r0, 0(%r3) -; 0: rll %r1, %r0, 0(%r4) ; sr %r1, %r5 ; rll %r1, %r1, 0(%r4) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: -; rll %r2, %r0, 16(%r4) +; sllk %r2, %r3, 3 +; lgr %r5, %r3 +; nill %r5, 65532 +; sllk %r4, %r4, 16 +; l %r0, 0(%r5) +; 0: rll %r1, %r0, 0(%r2) ; sr %r1, %r4 ; rll %r1, %r1, 0(%r2) ; cs %r0, %r1, 0(%r5) ; jglh 0b ; 1: +; rll %r2, %r0, 16(%r2) ; br %r14 function %atomic_rmw_sub_i8(i64, i64, i8) -> i8 { @@ -156,14 +161,17 @@ block0(v0: i64, v1: i64, v2: i8): return v3 } +; stmg %r11, %r15, 88(%r15) ; block0: -; sllk %r2, %r3, 3 -; nill %r3, 65532 -; sllk %r5, %r4, 24 -; lcr %r4, %r2 -; l %r0, 0(%r3) -; 0: rll %r1, %r0, 0(%r2) ; sr %r1, %r5 ; rll %r1, %r1, 0(%r4) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: -; rll %r2, %r0, 8(%r2) +; sllk %r11, %r3, 3 +; lgr %r5, %r3 +; nill %r5, 65532 +; sllk %r4, %r4, 24 +; lcr %r2, %r11 +; l %r0, 0(%r5) +; 0: rll %r1, %r0, 0(%r11) ; sr %r1, %r4 ; rll %r1, %r1, 0(%r2) ; cs %r0, %r1, 0(%r5) ; jglh 0b ; 1: +; rll %r2, %r0, 8(%r11) +; lmg %r11, %r15, 88(%r15) ; br %r14 function %atomic_rmw_and_i64(i64, i64) -> i64 { @@ -193,12 +201,12 @@ block0(v0: i64, v1: i64, v2: i16): } ; block0: -; lgr %r5, %r4 -; sllk %r4, %r3, 3 -; nill %r3, 65532 -; l %r0, 0(%r3) -; 0: rll %r1, %r0, 0(%r4) ; rnsbg %r1, %r5, 32, 48, 16 ; rll %r1, %r1, 0(%r4) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: -; rll %r2, %r0, 16(%r4) +; sllk %r2, %r3, 3 +; lgr %r5, %r3 +; nill %r5, 65532 +; l %r0, 0(%r5) +; 0: rll %r1, %r0, 0(%r2) ; rnsbg %r1, %r4, 32, 48, 16 ; rll %r1, %r1, 0(%r2) ; cs %r0, %r1, 0(%r5) ; jglh 0b ; 1: +; rll %r2, %r0, 16(%r2) ; br %r14 function %atomic_rmw_and_i8(i64, i64, i8) -> i8 { @@ -207,14 +215,16 @@ block0(v0: i64, v1: i64, v2: i8): return v3 } +; stmg %r11, %r15, 88(%r15) ; block0: -; lgr %r2, %r4 -; sllk %r4, %r3, 3 -; nill %r3, 65532 -; lcr %r5, %r4 -; l %r0, 0(%r3) -; 0: rll %r1, %r0, 0(%r4) ; rnsbg %r1, %r2, 32, 40, 24 ; rll %r1, %r1, 0(%r5) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: -; rll %r2, %r0, 8(%r4) +; sllk %r11, %r3, 3 +; lgr %r5, %r3 +; nill %r5, 65532 +; lcr %r2, %r11 +; l %r0, 0(%r5) +; 0: rll %r1, %r0, 0(%r11) ; rnsbg %r1, %r4, 32, 40, 24 ; rll %r1, %r1, 0(%r2) ; cs %r0, %r1, 0(%r5) ; jglh 0b ; 1: +; rll %r2, %r0, 8(%r11) +; lmg %r11, %r15, 88(%r15) ; br %r14 function %atomic_rmw_or_i64(i64, i64) -> i64 { @@ -244,12 +254,12 @@ block0(v0: i64, v1: i64, v2: i16): } ; block0: -; lgr %r5, %r4 -; sllk %r4, %r3, 3 -; nill %r3, 65532 -; l %r0, 0(%r3) -; 0: rll %r1, %r0, 0(%r4) ; rosbg %r1, %r5, 32, 48, 16 ; rll %r1, %r1, 0(%r4) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: -; rll %r2, %r0, 16(%r4) +; sllk %r2, %r3, 3 +; lgr %r5, %r3 +; nill %r5, 65532 +; l %r0, 0(%r5) +; 0: rll %r1, %r0, 0(%r2) ; rosbg %r1, %r4, 32, 48, 16 ; rll %r1, %r1, 0(%r2) ; cs %r0, %r1, 0(%r5) ; jglh 0b ; 1: +; rll %r2, %r0, 16(%r2) ; br %r14 function %atomic_rmw_or_i8(i64, i64, i8) -> i8 { @@ -258,14 +268,16 @@ block0(v0: i64, v1: i64, v2: i8): return v3 } +; stmg %r11, %r15, 88(%r15) ; block0: -; lgr %r2, %r4 -; sllk %r4, %r3, 3 -; nill %r3, 65532 -; lcr %r5, %r4 -; l %r0, 0(%r3) -; 0: rll %r1, %r0, 0(%r4) ; rosbg %r1, %r2, 32, 40, 24 ; rll %r1, %r1, 0(%r5) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: -; rll %r2, %r0, 8(%r4) +; sllk %r11, %r3, 3 +; lgr %r5, %r3 +; nill %r5, 65532 +; lcr %r2, %r11 +; l %r0, 0(%r5) +; 0: rll %r1, %r0, 0(%r11) ; rosbg %r1, %r4, 32, 40, 24 ; rll %r1, %r1, 0(%r2) ; cs %r0, %r1, 0(%r5) ; jglh 0b ; 1: +; rll %r2, %r0, 8(%r11) +; lmg %r11, %r15, 88(%r15) ; br %r14 function %atomic_rmw_xor_i64(i64, i64) -> i64 { @@ -295,12 +307,12 @@ block0(v0: i64, v1: i64, v2: i16): } ; block0: -; lgr %r5, %r4 -; sllk %r4, %r3, 3 -; nill %r3, 65532 -; l %r0, 0(%r3) -; 0: rll %r1, %r0, 0(%r4) ; rxsbg %r1, %r5, 32, 48, 16 ; rll %r1, %r1, 0(%r4) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: -; rll %r2, %r0, 16(%r4) +; sllk %r2, %r3, 3 +; lgr %r5, %r3 +; nill %r5, 65532 +; l %r0, 0(%r5) +; 0: rll %r1, %r0, 0(%r2) ; rxsbg %r1, %r4, 32, 48, 16 ; rll %r1, %r1, 0(%r2) ; cs %r0, %r1, 0(%r5) ; jglh 0b ; 1: +; rll %r2, %r0, 16(%r2) ; br %r14 function %atomic_rmw_xor_i8(i64, i64, i8) -> i8 { @@ -309,14 +321,16 @@ block0(v0: i64, v1: i64, v2: i8): return v3 } +; stmg %r11, %r15, 88(%r15) ; block0: -; lgr %r2, %r4 -; sllk %r4, %r3, 3 -; nill %r3, 65532 -; lcr %r5, %r4 -; l %r0, 0(%r3) -; 0: rll %r1, %r0, 0(%r4) ; rxsbg %r1, %r2, 32, 40, 24 ; rll %r1, %r1, 0(%r5) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: -; rll %r2, %r0, 8(%r4) +; sllk %r11, %r3, 3 +; lgr %r5, %r3 +; nill %r5, 65532 +; lcr %r2, %r11 +; l %r0, 0(%r5) +; 0: rll %r1, %r0, 0(%r11) ; rxsbg %r1, %r4, 32, 40, 24 ; rll %r1, %r1, 0(%r2) ; cs %r0, %r1, 0(%r5) ; jglh 0b ; 1: +; rll %r2, %r0, 8(%r11) +; lmg %r11, %r15, 88(%r15) ; br %r14 function %atomic_rmw_nand_i64(i64, i64, i64) -> i64 { @@ -350,12 +364,12 @@ block0(v0: i64, v1: i64, v2: i16): } ; block0: -; lgr %r5, %r4 -; sllk %r4, %r3, 3 -; nill %r3, 65532 -; l %r0, 0(%r3) -; 0: rll %r1, %r0, 0(%r4) ; rnsbg %r1, %r5, 32, 48, 16 ; xilf %r1, 4294901760 ; rll %r1, %r1, 0(%r4) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: -; rll %r2, %r0, 16(%r4) +; sllk %r2, %r3, 3 +; lgr %r5, %r3 +; nill %r5, 65532 +; l %r0, 0(%r5) +; 0: rll %r1, %r0, 0(%r2) ; rnsbg %r1, %r4, 32, 48, 16 ; xilf %r1, 4294901760 ; rll %r1, %r1, 0(%r2) ; cs %r0, %r1, 0(%r5) ; jglh 0b ; 1: +; rll %r2, %r0, 16(%r2) ; br %r14 function %atomic_rmw_nand_i8(i64, i64, i8) -> i8 { @@ -364,14 +378,16 @@ block0(v0: i64, v1: i64, v2: i8): return v3 } +; stmg %r11, %r15, 88(%r15) ; block0: -; lgr %r2, %r4 -; sllk %r4, %r3, 3 -; nill %r3, 65532 -; lcr %r5, %r4 -; l %r0, 0(%r3) -; 0: rll %r1, %r0, 0(%r4) ; rnsbg %r1, %r2, 32, 40, 24 ; xilf %r1, 4278190080 ; rll %r1, %r1, 0(%r5) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: -; rll %r2, %r0, 8(%r4) +; sllk %r11, %r3, 3 +; lgr %r5, %r3 +; nill %r5, 65532 +; lcr %r2, %r11 +; l %r0, 0(%r5) +; 0: rll %r1, %r0, 0(%r11) ; rnsbg %r1, %r4, 32, 40, 24 ; xilf %r1, 4278190080 ; rll %r1, %r1, 0(%r2) ; cs %r0, %r1, 0(%r5) ; jglh 0b ; 1: +; rll %r2, %r0, 8(%r11) +; lmg %r11, %r15, 88(%r15) ; br %r14 function %atomic_rmw_smin_i64(i64, i64, i64) -> i64 { @@ -405,13 +421,13 @@ block0(v0: i64, v1: i64, v2: i16): } ; block0: -; lgr %r5, %r4 -; sllk %r4, %r3, 3 -; nill %r3, 65532 -; sllk %r5, %r5, 16 -; l %r0, 0(%r3) -; 0: rll %r1, %r0, 0(%r4) ; cr %r5, %r1 ; jgnl 1f ; risbgn %r1, %r5, 32, 48, 0 ; rll %r1, %r1, 0(%r4) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: -; rll %r2, %r0, 16(%r4) +; sllk %r2, %r3, 3 +; lgr %r5, %r3 +; nill %r5, 65532 +; sllk %r4, %r4, 16 +; l %r0, 0(%r5) +; 0: rll %r1, %r0, 0(%r2) ; cr %r4, %r1 ; jgnl 1f ; risbgn %r1, %r4, 32, 48, 0 ; rll %r1, %r1, 0(%r2) ; cs %r0, %r1, 0(%r5) ; jglh 0b ; 1: +; rll %r2, %r0, 16(%r2) ; br %r14 function %atomic_rmw_smin_i8(i64, i64, i8) -> i8 { @@ -420,14 +436,17 @@ block0(v0: i64, v1: i64, v2: i8): return v3 } +; stmg %r11, %r15, 88(%r15) ; block0: -; sllk %r2, %r3, 3 -; nill %r3, 65532 -; sllk %r5, %r4, 24 -; lcr %r4, %r2 -; l %r0, 0(%r3) -; 0: rll %r1, %r0, 0(%r2) ; cr %r5, %r1 ; jgnl 1f ; risbgn %r1, %r5, 32, 40, 0 ; rll %r1, %r1, 0(%r4) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: -; rll %r2, %r0, 8(%r2) +; sllk %r11, %r3, 3 +; lgr %r5, %r3 +; nill %r5, 65532 +; sllk %r4, %r4, 24 +; lcr %r2, %r11 +; l %r0, 0(%r5) +; 0: rll %r1, %r0, 0(%r11) ; cr %r4, %r1 ; jgnl 1f ; risbgn %r1, %r4, 32, 40, 0 ; rll %r1, %r1, 0(%r2) ; cs %r0, %r1, 0(%r5) ; jglh 0b ; 1: +; rll %r2, %r0, 8(%r11) +; lmg %r11, %r15, 88(%r15) ; br %r14 function %atomic_rmw_smax_i64(i64, i64, i64) -> i64 { @@ -461,13 +480,13 @@ block0(v0: i64, v1: i64, v2: i16): } ; block0: -; lgr %r5, %r4 -; sllk %r4, %r3, 3 -; nill %r3, 65532 -; sllk %r5, %r5, 16 -; l %r0, 0(%r3) -; 0: rll %r1, %r0, 0(%r4) ; cr %r5, %r1 ; jgnh 1f ; risbgn %r1, %r5, 32, 48, 0 ; rll %r1, %r1, 0(%r4) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: -; rll %r2, %r0, 16(%r4) +; sllk %r2, %r3, 3 +; lgr %r5, %r3 +; nill %r5, 65532 +; sllk %r4, %r4, 16 +; l %r0, 0(%r5) +; 0: rll %r1, %r0, 0(%r2) ; cr %r4, %r1 ; jgnh 1f ; risbgn %r1, %r4, 32, 48, 0 ; rll %r1, %r1, 0(%r2) ; cs %r0, %r1, 0(%r5) ; jglh 0b ; 1: +; rll %r2, %r0, 16(%r2) ; br %r14 function %atomic_rmw_smax_i8(i64, i64, i8) -> i8 { @@ -476,14 +495,17 @@ block0(v0: i64, v1: i64, v2: i8): return v3 } +; stmg %r11, %r15, 88(%r15) ; block0: -; sllk %r2, %r3, 3 -; nill %r3, 65532 -; sllk %r5, %r4, 24 -; lcr %r4, %r2 -; l %r0, 0(%r3) -; 0: rll %r1, %r0, 0(%r2) ; cr %r5, %r1 ; jgnh 1f ; risbgn %r1, %r5, 32, 40, 0 ; rll %r1, %r1, 0(%r4) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: -; rll %r2, %r0, 8(%r2) +; sllk %r11, %r3, 3 +; lgr %r5, %r3 +; nill %r5, 65532 +; sllk %r4, %r4, 24 +; lcr %r2, %r11 +; l %r0, 0(%r5) +; 0: rll %r1, %r0, 0(%r11) ; cr %r4, %r1 ; jgnh 1f ; risbgn %r1, %r4, 32, 40, 0 ; rll %r1, %r1, 0(%r2) ; cs %r0, %r1, 0(%r5) ; jglh 0b ; 1: +; rll %r2, %r0, 8(%r11) +; lmg %r11, %r15, 88(%r15) ; br %r14 function %atomic_rmw_umin_i64(i64, i64, i64) -> i64 { @@ -517,13 +539,13 @@ block0(v0: i64, v1: i64, v2: i16): } ; block0: -; lgr %r5, %r4 -; sllk %r4, %r3, 3 -; nill %r3, 65532 -; sllk %r5, %r5, 16 -; l %r0, 0(%r3) -; 0: rll %r1, %r0, 0(%r4) ; clr %r5, %r1 ; jgnl 1f ; risbgn %r1, %r5, 32, 48, 0 ; rll %r1, %r1, 0(%r4) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: -; rll %r2, %r0, 16(%r4) +; sllk %r2, %r3, 3 +; lgr %r5, %r3 +; nill %r5, 65532 +; sllk %r4, %r4, 16 +; l %r0, 0(%r5) +; 0: rll %r1, %r0, 0(%r2) ; clr %r4, %r1 ; jgnl 1f ; risbgn %r1, %r4, 32, 48, 0 ; rll %r1, %r1, 0(%r2) ; cs %r0, %r1, 0(%r5) ; jglh 0b ; 1: +; rll %r2, %r0, 16(%r2) ; br %r14 function %atomic_rmw_umin_i8(i64, i64, i8) -> i8 { @@ -532,14 +554,17 @@ block0(v0: i64, v1: i64, v2: i8): return v3 } +; stmg %r11, %r15, 88(%r15) ; block0: -; sllk %r2, %r3, 3 -; nill %r3, 65532 -; sllk %r5, %r4, 24 -; lcr %r4, %r2 -; l %r0, 0(%r3) -; 0: rll %r1, %r0, 0(%r2) ; clr %r5, %r1 ; jgnl 1f ; risbgn %r1, %r5, 32, 40, 0 ; rll %r1, %r1, 0(%r4) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: -; rll %r2, %r0, 8(%r2) +; sllk %r11, %r3, 3 +; lgr %r5, %r3 +; nill %r5, 65532 +; sllk %r4, %r4, 24 +; lcr %r2, %r11 +; l %r0, 0(%r5) +; 0: rll %r1, %r0, 0(%r11) ; clr %r4, %r1 ; jgnl 1f ; risbgn %r1, %r4, 32, 40, 0 ; rll %r1, %r1, 0(%r2) ; cs %r0, %r1, 0(%r5) ; jglh 0b ; 1: +; rll %r2, %r0, 8(%r11) +; lmg %r11, %r15, 88(%r15) ; br %r14 function %atomic_rmw_umax_i64(i64, i64, i64) -> i64 { @@ -573,13 +598,13 @@ block0(v0: i64, v1: i64, v2: i16): } ; block0: -; lgr %r5, %r4 -; sllk %r4, %r3, 3 -; nill %r3, 65532 -; sllk %r5, %r5, 16 -; l %r0, 0(%r3) -; 0: rll %r1, %r0, 0(%r4) ; clr %r5, %r1 ; jgnh 1f ; risbgn %r1, %r5, 32, 48, 0 ; rll %r1, %r1, 0(%r4) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: -; rll %r2, %r0, 16(%r4) +; sllk %r2, %r3, 3 +; lgr %r5, %r3 +; nill %r5, 65532 +; sllk %r4, %r4, 16 +; l %r0, 0(%r5) +; 0: rll %r1, %r0, 0(%r2) ; clr %r4, %r1 ; jgnh 1f ; risbgn %r1, %r4, 32, 48, 0 ; rll %r1, %r1, 0(%r2) ; cs %r0, %r1, 0(%r5) ; jglh 0b ; 1: +; rll %r2, %r0, 16(%r2) ; br %r14 function %atomic_rmw_umax_i8(i64, i64, i8) -> i8 { @@ -588,13 +613,16 @@ block0(v0: i64, v1: i64, v2: i8): return v3 } +; stmg %r11, %r15, 88(%r15) ; block0: -; sllk %r2, %r3, 3 -; nill %r3, 65532 -; sllk %r5, %r4, 24 -; lcr %r4, %r2 -; l %r0, 0(%r3) -; 0: rll %r1, %r0, 0(%r2) ; clr %r5, %r1 ; jgnh 1f ; risbgn %r1, %r5, 32, 40, 0 ; rll %r1, %r1, 0(%r4) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: -; rll %r2, %r0, 8(%r2) +; sllk %r11, %r3, 3 +; lgr %r5, %r3 +; nill %r5, 65532 +; sllk %r4, %r4, 24 +; lcr %r2, %r11 +; l %r0, 0(%r5) +; 0: rll %r1, %r0, 0(%r11) ; clr %r4, %r1 ; jgnh 1f ; risbgn %r1, %r4, 32, 40, 0 ; rll %r1, %r1, 0(%r2) ; cs %r0, %r1, 0(%r5) ; jglh 0b ; 1: +; rll %r2, %r0, 8(%r11) +; lmg %r11, %r15, 88(%r15) ; br %r14 diff --git a/cranelift/filetests/filetests/isa/s390x/bitops.clif b/cranelift/filetests/filetests/isa/s390x/bitops.clif index 6ea451147d..2ea8f3b5a5 100644 --- a/cranelift/filetests/filetests/isa/s390x/bitops.clif +++ b/cranelift/filetests/filetests/isa/s390x/bitops.clif @@ -347,9 +347,10 @@ block0(v0: i32): } ; block0: -; oihl %r2, 1 -; lcgr %r3, %r2 -; ngrk %r5, %r2, %r3 +; lgr %r5, %r2 +; oihl %r5, 1 +; lcgr %r3, %r5 +; ngr %r5, %r3 ; flogr %r0, %r5 ; lhi %r4, 63 ; srk %r2, %r4, %r0 @@ -362,9 +363,10 @@ block0(v0: i16): } ; block0: -; oilh %r2, 1 -; lcgr %r3, %r2 -; ngrk %r5, %r2, %r3 +; lgr %r5, %r2 +; oilh %r5, 1 +; lcgr %r3, %r5 +; ngr %r5, %r3 ; flogr %r0, %r5 ; lhi %r4, 63 ; srk %r2, %r4, %r0 @@ -377,9 +379,10 @@ block0(v0: i8): } ; block0: -; oill %r2, 256 -; lcgr %r3, %r2 -; ngrk %r5, %r2, %r3 +; lgr %r5, %r2 +; oill %r5, 256 +; lcgr %r3, %r5 +; ngr %r5, %r3 ; flogr %r0, %r5 ; lhi %r4, 63 ; srk %r2, %r4, %r0 diff --git a/cranelift/filetests/filetests/isa/s390x/bitwise.clif b/cranelift/filetests/filetests/isa/s390x/bitwise.clif index affc376914..8ea3b5bbb0 100644 --- a/cranelift/filetests/filetests/isa/s390x/bitwise.clif +++ b/cranelift/filetests/filetests/isa/s390x/bitwise.clif @@ -597,8 +597,9 @@ block0(v0: i32, v1: i32, v2: i32): ; block0: ; nr %r3, %r2 -; xilf %r2, 4294967295 -; nr %r4, %r2 +; lgr %r5, %r2 +; xilf %r5, 4294967295 +; nr %r4, %r5 ; ork %r2, %r4, %r3 ; br %r14 @@ -610,8 +611,9 @@ block0(v0: i16, v1: i16, v2: i16): ; block0: ; nr %r3, %r2 -; xilf %r2, 4294967295 -; nr %r4, %r2 +; lgr %r5, %r2 +; xilf %r5, 4294967295 +; nr %r4, %r5 ; ork %r2, %r4, %r3 ; br %r14 @@ -623,8 +625,9 @@ block0(v0: i8, v1: i8, v2: i8): ; block0: ; nr %r3, %r2 -; xilf %r2, 4294967295 -; nr %r4, %r2 +; lgr %r5, %r2 +; xilf %r5, 4294967295 +; nr %r4, %r5 ; ork %r2, %r4, %r3 ; br %r14 diff --git a/cranelift/filetests/filetests/isa/s390x/call.clif b/cranelift/filetests/filetests/isa/s390x/call.clif index dbfe73b088..7dfd72f031 100644 --- a/cranelift/filetests/filetests/isa/s390x/call.clif +++ b/cranelift/filetests/filetests/isa/s390x/call.clif @@ -46,7 +46,8 @@ block0(v0: i32): } ; block0: -; llgfr %r2, %r2 +; lgr %r5, %r2 +; llgfr %r2, %r5 ; br %r14 function %call_uext(i32) -> i64 { @@ -73,7 +74,8 @@ block0(v0: i32): } ; block0: -; lgfr %r2, %r2 +; lgr %r5, %r2 +; lgfr %r2, %r5 ; br %r14 function %call_colocated(i64) -> i64 { diff --git a/cranelift/filetests/filetests/isa/s390x/shift-rotate.clif b/cranelift/filetests/filetests/isa/s390x/shift-rotate.clif index 4163806b99..57a118a7db 100644 --- a/cranelift/filetests/filetests/isa/s390x/shift-rotate.clif +++ b/cranelift/filetests/filetests/isa/s390x/shift-rotate.clif @@ -153,13 +153,14 @@ block0(v0: i16, v1: i16): } ; block0: +; lgr %r4, %r3 ; llhr %r2, %r2 -; lcr %r4, %r3 -; nill %r3, 15 +; lcr %r3, %r4 ; nill %r4, 15 -; sllk %r4, %r2, 0(%r4) -; srlk %r5, %r2, 0(%r3) -; ork %r2, %r4, %r5 +; nill %r3, 15 +; sllk %r3, %r2, 0(%r3) +; srlk %r4, %r2, 0(%r4) +; ork %r2, %r3, %r4 ; br %r14 function %rotr_i16_imm(i16) -> i16 { @@ -201,13 +202,14 @@ block0(v0: i8, v1: i8): } ; block0: +; lgr %r4, %r3 ; llcr %r2, %r2 -; lcr %r4, %r3 -; nill %r3, 7 +; lcr %r3, %r4 ; nill %r4, 7 -; sllk %r4, %r2, 0(%r4) -; srlk %r5, %r2, 0(%r3) -; ork %r2, %r4, %r5 +; nill %r3, 7 +; sllk %r3, %r2, 0(%r3) +; srlk %r4, %r2, 0(%r4) +; ork %r2, %r3, %r4 ; br %r14 function %rotr_i8_imm(i8) -> i8 { @@ -578,8 +580,9 @@ block0(v0: i16, v1: i16): ; block0: ; llhr %r2, %r2 -; nill %r3, 15 -; srlk %r2, %r2, 0(%r3) +; lgr %r4, %r3 +; nill %r4, 15 +; srlk %r2, %r2, 0(%r4) ; br %r14 function %ushr_i16_imm(i16) -> i16 { @@ -616,8 +619,9 @@ block0(v0: i8, v1: i8): ; block0: ; llcr %r2, %r2 -; nill %r3, 7 -; srlk %r2, %r2, 0(%r3) +; lgr %r4, %r3 +; nill %r4, 7 +; srlk %r2, %r2, 0(%r4) ; br %r14 function %ushr_i8_imm(i8) -> i8 { @@ -950,8 +954,9 @@ block0(v0: i16, v1: i16): ; block0: ; lhr %r2, %r2 -; nill %r3, 15 -; srak %r2, %r2, 0(%r3) +; lgr %r4, %r3 +; nill %r4, 15 +; srak %r2, %r2, 0(%r4) ; br %r14 function %sshr_i16_imm(i16) -> i16 { @@ -988,8 +993,9 @@ block0(v0: i8, v1: i8): ; block0: ; lbr %r2, %r2 -; nill %r3, 7 -; srak %r2, %r2, 0(%r3) +; lgr %r4, %r3 +; nill %r4, 7 +; srak %r2, %r2, 0(%r4) ; br %r14 function %sshr_i8_imm(i8) -> i8 { diff --git a/cranelift/filetests/filetests/isa/x64/amode-opt.clif b/cranelift/filetests/filetests/isa/x64/amode-opt.clif index 7c7b315955..72bb63d321 100644 --- a/cranelift/filetests/filetests/isa/x64/amode-opt.clif +++ b/cranelift/filetests/filetests/isa/x64/amode-opt.clif @@ -153,8 +153,9 @@ block0(v0: i64, v1: i32, v2: i32): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; addl %esi, %edx, %esi -; movq -1(%rdi,%rsi,4), %rax +; movq %rsi, %r9 +; addl %r9d, %edx, %r9d +; movq -1(%rdi,%r9,4), %rax ; movq %rbp, %rsp ; popq %rbp ; ret diff --git a/cranelift/filetests/filetests/isa/x64/b1.clif b/cranelift/filetests/filetests/isa/x64/b1.clif index d790d72d94..65cf14c407 100644 --- a/cranelift/filetests/filetests/isa/x64/b1.clif +++ b/cranelift/filetests/filetests/isa/x64/b1.clif @@ -11,8 +11,8 @@ block0(v0: b1, v1: i32, v2: i32): ; movq %rsp, %rbp ; block0: ; testb $1, %dil -; cmovnzl %esi, %edx, %edx ; movq %rdx, %rax +; cmovnzl %esi, %eax, %eax ; movq %rbp, %rsp ; popq %rbp ; ret @@ -147,8 +147,8 @@ block0(v0: i64): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; shrq $63, %rdi, %rdi ; movq %rdi, %rax +; shrq $63, %rax, %rax ; movq %rbp, %rsp ; popq %rbp ; ret @@ -163,8 +163,8 @@ block0(v0: i32): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; shrl $31, %edi, %edi ; movq %rdi, %rax +; shrl $31, %eax, %eax ; movq %rbp, %rsp ; popq %rbp ; ret @@ -179,8 +179,8 @@ block0(v0: i64): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; shrq $63, %rdi, %rdi ; movq %rdi, %rax +; shrq $63, %rax, %rax ; movq %rbp, %rsp ; popq %rbp ; ret @@ -195,8 +195,8 @@ block0(v0: i32): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; shrl $31, %edi, %edi ; movq %rdi, %rax +; shrl $31, %eax, %eax ; movq %rbp, %rsp ; popq %rbp ; ret @@ -211,9 +211,9 @@ block0(v0: i64): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; notq %rdi, %rdi -; shrq $63, %rdi, %rdi ; movq %rdi, %rax +; notq %rax, %rax +; shrq $63, %rax, %rax ; movq %rbp, %rsp ; popq %rbp ; ret @@ -228,9 +228,9 @@ block0(v0: i32): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; notq %rdi, %rdi -; shrl $31, %edi, %edi ; movq %rdi, %rax +; notq %rax, %rax +; shrl $31, %eax, %eax ; movq %rbp, %rsp ; popq %rbp ; ret @@ -245,9 +245,9 @@ block0(v0: i64): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; notq %rdi, %rdi -; shrq $63, %rdi, %rdi ; movq %rdi, %rax +; notq %rax, %rax +; shrq $63, %rax, %rax ; movq %rbp, %rsp ; popq %rbp ; ret @@ -262,9 +262,9 @@ block0(v0: i32): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; notq %rdi, %rdi -; shrl $31, %edi, %edi ; movq %rdi, %rax +; notq %rax, %rax +; shrl $31, %eax, %eax ; movq %rbp, %rsp ; popq %rbp ; ret diff --git a/cranelift/filetests/filetests/isa/x64/basic.clif b/cranelift/filetests/filetests/isa/x64/basic.clif index ba779d4cf6..ad20bcc4f0 100644 --- a/cranelift/filetests/filetests/isa/x64/basic.clif +++ b/cranelift/filetests/filetests/isa/x64/basic.clif @@ -10,8 +10,8 @@ block0(v0: i32, v1: i32): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; addl %edi, %esi, %edi ; movq %rdi, %rax +; addl %eax, %esi, %eax ; movq %rbp, %rsp ; popq %rbp ; ret diff --git a/cranelift/filetests/filetests/isa/x64/call-conv.clif b/cranelift/filetests/filetests/isa/x64/call-conv.clif index 6ceb2225e6..6586bacce9 100644 --- a/cranelift/filetests/filetests/isa/x64/call-conv.clif +++ b/cranelift/filetests/filetests/isa/x64/call-conv.clif @@ -12,11 +12,10 @@ block0(v0: i32): ; pushq %rbp ; movq %rsp, %rbp ; block0: +; movq %rdi, %rcx ; subq %rsp, $32, %rsp ; virtual_sp_offset_adjust 32 -; movq %rdi, %rcx -; movq %rcx, %rdi -; call *%rdi +; call *%rcx ; addq %rsp, $32, %rsp ; virtual_sp_offset_adjust -32 ; movq %rbp, %rsp @@ -36,20 +35,16 @@ block0(v0: i32, v1: f32): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; movq %rdi, %r10 ; movdqa %xmm0, %xmm6 ; subq %rsp, $32, %rsp ; virtual_sp_offset_adjust 32 -; movq %r10, %rcx +; movq %rdi, %rcx ; movdqa %xmm6, %xmm1 -; movq %r10, %rdi -; movdqa %xmm1, %xmm6 ; call *%rdi ; addq %rsp, $32, %rsp ; virtual_sp_offset_adjust -32 -; movq %rdi, %r10 ; movdqa %xmm6, %xmm0 -; call *%r10 +; call *%rdi ; movq %rbp, %rsp ; popq %rbp ; ret @@ -130,22 +125,19 @@ block0( ; pushq %rbp ; movq %rsp, %rbp ; block0: -; movq %rdx, %r10 +; movq %rcx, %rax +; movq %rdx, %rcx ; movq %rsi, %rdx -; movq %r8, %rsi -; movq %r10, %r8 -; movq %r9, %rax -; movq %rcx, %r9 +; movq %rdi, %rsi +; movq %rax, %rdi ; movq 16(%rbp), %r11 ; movq 24(%rbp), %r10 ; movss 32(%rbp), %xmm9 ; movsd 40(%rbp), %xmm8 ; subq %rsp, $144, %rsp ; virtual_sp_offset_adjust 144 -; movq %rdi, %rcx -; movq %rsi, 32(%rsp) -; movq %rax, %rsi -; movq %rsi, 40(%rsp) +; movq %r8, 32(%rsp) +; movq %r9, 40(%rsp) ; movsd %xmm0, 48(%rsp) ; movsd %xmm1, 56(%rsp) ; movsd %xmm2, 64(%rsp) @@ -158,7 +150,10 @@ block0( ; movl %r10d, 120(%rsp) ; movss %xmm9, 128(%rsp) ; movsd %xmm8, 136(%rsp) -; call *%rdi +; movq %rdi, %r9 +; movq %rcx, %r8 +; movq %rsi, %rcx +; call *%rcx ; addq %rsp, $144, %rsp ; virtual_sp_offset_adjust -144 ; movq %rbp, %rsp @@ -180,16 +175,15 @@ block0(v0: i64, v1:i64, v2:i64, v3:i64, v4:i64): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; movq %r8, %r10 -; movq %rdx, %r8 -; movq %rcx, %rax +; movq %rdx, %r11 +; movq %rcx, %r9 +; movq %rsi, %rdx +; movq %rdi, %rcx ; subq %rsp, $48, %rsp ; virtual_sp_offset_adjust 48 -; movq %rdi, %rcx -; movq %rsi, %rdx -; movq %rax, %r9 -; movq %r10, 32(%rsp) -; call *%rdi +; movq %r8, 32(%rsp) +; movq %r11, %r8 +; call *%rcx ; addq %rsp, $48, %rsp ; virtual_sp_offset_adjust -48 ; movq %rbp, %rsp @@ -206,24 +200,23 @@ block0(v0: i32, v1: f32, v2: i64, v3: f64, v4: i32, v5: i32, v6: i32, v7: f32, v ; pushq %rbp ; movq %rsp, %rbp ; block0: -; movq %rcx, %r10 -; movq %r8, %r9 -; movdqa %xmm1, %xmm6 -; movdqa %xmm3, %xmm8 +; movq %rsi, %r9 +; movq %rdi, %rsi +; movdqa %xmm1, %xmm12 +; movdqa %xmm0, %xmm1 ; subq %rsp, $96, %rsp ; virtual_sp_offset_adjust 96 -; movq %rdi, %rcx -; movdqa %xmm0, %xmm1 -; movq %rsi, %r8 -; movdqa %xmm6, %xmm3 ; movl %edx, 32(%rsp) -; movl %r10d, 40(%rsp) -; movl %r9d, 48(%rsp) +; movl %ecx, 40(%rsp) +; movl %r8d, 48(%rsp) ; movss %xmm2, 56(%rsp) -; movsd %xmm8, 64(%rsp) +; movsd %xmm3, 64(%rsp) ; movss %xmm4, 72(%rsp) ; movsd %xmm5, 80(%rsp) -; call *%rdi +; movq %rsi, %rcx +; movq %r9, %r8 +; movdqa %xmm12, %xmm3 +; call *%rcx ; addq %rsp, $96, %rsp ; virtual_sp_offset_adjust -96 ; movq %rbp, %rsp @@ -240,8 +233,7 @@ block0(v0: i32): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; movq %rdi, %rcx -; call *%rcx +; call *%rdi ; movq %rbp, %rsp ; popq %rbp ; ret @@ -256,8 +248,7 @@ block0(v0: i32): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; movq %rdi, %rcx -; call *%rcx +; call *%rdi ; movq %rbp, %rsp ; popq %rbp ; ret @@ -301,8 +292,9 @@ block0: ; movq %rdi, %r13 ; movl $1, %edx ; call *%rdx +; movq %rdx, %r9 ; movq %r13, %rdi -; movl %edx, 0(%rdi) +; movl %r9d, 0(%rdi) ; movq 0(%rsp), %r13 ; addq %rsp, $16, %rsp ; movq %rbp, %rsp @@ -328,12 +320,13 @@ block0: ; virtual_sp_offset_adjust 16 ; lea 0(%rsp), %rdi ; call *%r8 -; movq 0(%rsp), %r11 +; movq %rdx, %rcx +; movq 0(%rsp), %rdx ; addq %rsp, $16, %rsp ; virtual_sp_offset_adjust -16 ; movq %rbx, %rdi -; movq %rdx, 0(%rdi) -; movl %r11d, 8(%rdi) +; movq %rcx, 0(%rdi) +; movl %edx, 8(%rdi) ; movq 0(%rsp), %rbx ; addq %rsp, $16, %rsp ; movq %rbp, %rsp @@ -356,10 +349,13 @@ block0: ; movq %rdi, %r12 ; movl $1, %r9d ; call *%r9 +; movq %rax, %r9 +; movq %rdx, %r11 +; movdqa %xmm1, %xmm10 ; movq %r12, %rdi -; movq %rax, 0(%rdi) -; movl %edx, 8(%rdi) -; movss %xmm1, 12(%rdi) +; movq %r9, 0(%rdi) +; movl %r11d, 8(%rdi) +; movss %xmm10, 12(%rdi) ; movq 0(%rsp), %r12 ; addq %rsp, $16, %rsp ; movq %rbp, %rsp @@ -377,16 +373,19 @@ block0(v0: f32, v1: i64, v2: i32, v3: f32): ; pushq %rbp ; movq %rsp, %rbp ; subq %rsp, $16, %rsp -; movq %rbx, 0(%rsp) +; movq %r13, 0(%rsp) ; block0: -; movq %rdx, %rbx +; movq %rdx, %r13 ; movl $1, %eax ; call *%rax -; movq %rbx, %rcx -; movq %rax, 0(%rcx) -; movl %edx, 8(%rcx) -; movss %xmm1, 12(%rcx) -; movq 0(%rsp), %rbx +; movq %rax, %rdi +; movq %rdx, %rcx +; movdqa %xmm1, %xmm14 +; movq %r13, %rdx +; movq %rdi, 0(%rdx) +; movl %ecx, 8(%rdx) +; movss %xmm14, 12(%rdx) +; movq 0(%rsp), %r13 ; addq %rsp, $16, %rsp ; movq %rbp, %rsp ; popq %rbp diff --git a/cranelift/filetests/filetests/isa/x64/ceil-libcall.clif b/cranelift/filetests/filetests/isa/x64/ceil-libcall.clif index f24acddcd4..2041ba21a9 100644 --- a/cranelift/filetests/filetests/isa/x64/ceil-libcall.clif +++ b/cranelift/filetests/filetests/isa/x64/ceil-libcall.clif @@ -10,8 +10,8 @@ block0(v0: f32): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; load_ext_name %CeilF32+0, %r8 -; call *%r8 +; load_ext_name %CeilF32+0, %rdx +; call *%rdx ; movq %rbp, %rsp ; popq %rbp ; ret @@ -25,8 +25,8 @@ block0(v0: f64): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; load_ext_name %CeilF64+0, %r8 -; call *%r8 +; load_ext_name %CeilF64+0, %rdx +; call *%rdx ; movq %rbp, %rsp ; popq %rbp ; ret diff --git a/cranelift/filetests/filetests/isa/x64/cmp-mem-bug.clif b/cranelift/filetests/filetests/isa/x64/cmp-mem-bug.clif index e07ea5ba36..0553a26d0a 100644 --- a/cranelift/filetests/filetests/isa/x64/cmp-mem-bug.clif +++ b/cranelift/filetests/filetests/isa/x64/cmp-mem-bug.clif @@ -18,8 +18,8 @@ block0(v0: i64, v1: i64): ; setz %al ; andq %rax, $1, %rax ; cmpq %r11, %rdi -; cmovzq %rdi, %rsi, %rsi ; movq %rsi, %rdx +; cmovzq %rdi, %rdx, %rdx ; movq %rbp, %rsp ; popq %rbp ; ret diff --git a/cranelift/filetests/filetests/isa/x64/div-checks.clif b/cranelift/filetests/filetests/isa/x64/div-checks.clif index af717f8e6c..573e9794ff 100644 --- a/cranelift/filetests/filetests/isa/x64/div-checks.clif +++ b/cranelift/filetests/filetests/isa/x64/div-checks.clif @@ -17,9 +17,8 @@ block0(v0: i8, v1: i8): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; xorl %r10d, %r10d, %r10d ; movq %rdi, %rax -; movq %r10, %rdx +; xorl %edx, %edx, %edx ; srem_seq %al, %dl, %sil, %al, %dl, tmp=(none) ; shrq $8, %rax, %rax ; movq %rbp, %rsp @@ -36,9 +35,8 @@ block0(v0: i16, v1: i16): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; xorl %r10d, %r10d, %r10d ; movq %rdi, %rax -; movq %r10, %rdx +; xorl %edx, %edx, %edx ; srem_seq %ax, %dx, %si, %ax, %dx, tmp=(none) ; movq %rdx, %rax ; movq %rbp, %rsp @@ -55,9 +53,8 @@ block0(v0: i32, v1: i32): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; xorl %r10d, %r10d, %r10d ; movq %rdi, %rax -; movq %r10, %rdx +; xorl %edx, %edx, %edx ; srem_seq %eax, %edx, %esi, %eax, %edx, tmp=(none) ; movq %rdx, %rax ; movq %rbp, %rsp @@ -74,9 +71,8 @@ block0(v0: i64, v1: i64): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; xorl %r10d, %r10d, %r10d ; movq %rdi, %rax -; movq %r10, %rdx +; xorl %edx, %edx, %edx ; srem_seq %rax, %rdx, %rsi, %rax, %rdx, tmp=(none) ; movq %rdx, %rax ; movq %rbp, %rsp diff --git a/cranelift/filetests/filetests/isa/x64/fastcall.clif b/cranelift/filetests/filetests/isa/x64/fastcall.clif index addc1118c3..efe8a5f2c6 100644 --- a/cranelift/filetests/filetests/isa/x64/fastcall.clif +++ b/cranelift/filetests/filetests/isa/x64/fastcall.clif @@ -149,20 +149,25 @@ block0(v0: i64): ; pushq %rbp ; unwind PushFrameRegs { offset_upward_to_caller_sp: 16 } ; movq %rsp, %rbp -; unwind DefineNewFrame { offset_upward_to_caller_sp: 16, offset_downward_to_clobbers: 0 } +; unwind DefineNewFrame { offset_upward_to_caller_sp: 16, offset_downward_to_clobbers: 16 } +; subq %rsp, $16, %rsp +; movq %rsi, 0(%rsp) +; unwind SaveReg { clobber_offset: 0, reg: p6i } ; block0: -; cvtsi2sd %rcx, %xmm2 +; cvtsi2sd %rcx, %xmm3 ; subq %rsp, $48, %rsp ; virtual_sp_offset_adjust 48 +; movq %rcx, 32(%rsp) +; movq %rcx, 40(%rsp) ; movq %rcx, %rdx -; movq %rdx, %r8 -; movdqa %xmm2, %xmm3 -; movq %r8, 32(%rsp) -; movq %r8, 40(%rsp) -; load_ext_name %g+0, %r8 -; call *%r8 +; load_ext_name %g+0, %rsi +; movq %rdx, %rcx +; movdqa %xmm3, %xmm2 +; call *%rsi ; addq %rsp, $48, %rsp ; virtual_sp_offset_adjust -48 +; movq 0(%rsp), %rsi +; addq %rsp, $16, %rsp ; movq %rbp, %rsp ; popq %rbp ; ret diff --git a/cranelift/filetests/filetests/isa/x64/fcvt.clif b/cranelift/filetests/filetests/isa/x64/fcvt.clif index 5f5e8b8488..88725b01da 100644 --- a/cranelift/filetests/filetests/isa/x64/fcvt.clif +++ b/cranelift/filetests/filetests/isa/x64/fcvt.clif @@ -190,10 +190,11 @@ block0(v0: i32x4): ; movdqa %xmm0, %xmm4 ; pslld %xmm4, $16, %xmm4 ; psrld %xmm4, $16, %xmm4 -; psubd %xmm0, %xmm4, %xmm0 +; movdqa %xmm0, %xmm10 +; psubd %xmm10, %xmm4, %xmm10 ; cvtdq2ps %xmm4, %xmm9 -; psrld %xmm0, $1, %xmm0 -; cvtdq2ps %xmm0, %xmm0 +; psrld %xmm10, $1, %xmm10 +; cvtdq2ps %xmm10, %xmm0 ; addps %xmm0, %xmm0, %xmm0 ; addps %xmm0, %xmm9, %xmm0 ; movq %rbp, %rsp @@ -434,14 +435,15 @@ block0(v0: f32x4): ; movq %rsp, %rbp ; block0: ; pxor %xmm3, %xmm3, %xmm3 -; maxps %xmm0, %xmm3, %xmm0 +; movdqa %xmm0, %xmm10 +; maxps %xmm10, %xmm3, %xmm10 ; pcmpeqd %xmm8, %xmm8, %xmm8 ; psrld %xmm8, $1, %xmm8 ; cvtdq2ps %xmm8, %xmm14 -; cvttps2dq %xmm0, %xmm13 -; subps %xmm0, %xmm14, %xmm0 -; cmpps $2, %xmm14, %xmm0, %xmm14 -; cvttps2dq %xmm0, %xmm0 +; cvttps2dq %xmm10, %xmm13 +; subps %xmm10, %xmm14, %xmm10 +; cmpps $2, %xmm14, %xmm10, %xmm14 +; cvttps2dq %xmm10, %xmm0 ; pxor %xmm0, %xmm14, %xmm0 ; pxor %xmm7, %xmm7, %xmm7 ; pmaxsd %xmm0, %xmm7, %xmm0 @@ -461,9 +463,10 @@ block0(v0: f32x4): ; block0: ; movdqa %xmm0, %xmm5 ; cmpps $0, %xmm5, %xmm0, %xmm5 -; andps %xmm0, %xmm5, %xmm0 -; pxor %xmm5, %xmm0, %xmm5 -; cvttps2dq %xmm0, %xmm9 +; movdqa %xmm0, %xmm6 +; andps %xmm6, %xmm5, %xmm6 +; pxor %xmm5, %xmm6, %xmm5 +; cvttps2dq %xmm6, %xmm9 ; movdqa %xmm9, %xmm0 ; pand %xmm0, %xmm5, %xmm0 ; psrad %xmm0, $31, %xmm0 diff --git a/cranelift/filetests/filetests/isa/x64/floor-libcall.clif b/cranelift/filetests/filetests/isa/x64/floor-libcall.clif index f965c6687c..745426d716 100644 --- a/cranelift/filetests/filetests/isa/x64/floor-libcall.clif +++ b/cranelift/filetests/filetests/isa/x64/floor-libcall.clif @@ -10,8 +10,8 @@ block0(v0: f32): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; load_ext_name %FloorF32+0, %r8 -; call *%r8 +; load_ext_name %FloorF32+0, %rdx +; call *%rdx ; movq %rbp, %rsp ; popq %rbp ; ret @@ -25,8 +25,8 @@ block0(v0: f64): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; load_ext_name %FloorF64+0, %r8 -; call *%r8 +; load_ext_name %FloorF64+0, %rdx +; call *%rdx ; movq %rbp, %rsp ; popq %rbp ; ret diff --git a/cranelift/filetests/filetests/isa/x64/fma-call.clif b/cranelift/filetests/filetests/isa/x64/fma-call.clif index ba57d01344..9bc580f656 100644 --- a/cranelift/filetests/filetests/isa/x64/fma-call.clif +++ b/cranelift/filetests/filetests/isa/x64/fma-call.clif @@ -10,8 +10,8 @@ block0(v0: f32, v1: f32, v2: f32): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; load_ext_name %FmaF32+0, %rsi -; call *%rsi +; load_ext_name %FmaF32+0, %r9 +; call *%r9 ; movq %rbp, %rsp ; popq %rbp ; ret @@ -25,8 +25,8 @@ block0(v0: f64, v1: f64, v2: f64): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; load_ext_name %FmaF64+0, %rsi -; call *%rsi +; load_ext_name %FmaF64+0, %r9 +; call *%r9 ; movq %rbp, %rsp ; popq %rbp ; ret diff --git a/cranelift/filetests/filetests/isa/x64/i128.clif b/cranelift/filetests/filetests/isa/x64/i128.clif index df5a7f597c..504cb1cc40 100644 --- a/cranelift/filetests/filetests/isa/x64/i128.clif +++ b/cranelift/filetests/filetests/isa/x64/i128.clif @@ -11,10 +11,10 @@ block0(v0: i128, v1: i128): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; addq %rdi, %rdx, %rdi -; adcq %rsi, %rcx, %rsi ; movq %rdi, %rax +; addq %rax, %rdx, %rax ; movq %rsi, %rdx +; adcq %rdx, %rcx, %rdx ; movq %rbp, %rsp ; popq %rbp ; ret @@ -28,10 +28,10 @@ block0(v0: i128, v1: i128): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; subq %rdi, %rdx, %rdi -; sbbq %rsi, %rcx, %rsi ; movq %rdi, %rax +; subq %rax, %rdx, %rax ; movq %rsi, %rdx +; sbbq %rdx, %rcx, %rdx ; movq %rbp, %rsp ; popq %rbp ; ret @@ -45,10 +45,10 @@ block0(v0: i128, v1: i128): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; andq %rdi, %rdx, %rdi -; andq %rsi, %rcx, %rsi ; movq %rdi, %rax +; andq %rax, %rdx, %rax ; movq %rsi, %rdx +; andq %rdx, %rcx, %rdx ; movq %rbp, %rsp ; popq %rbp ; ret @@ -62,10 +62,10 @@ block0(v0: i128, v1: i128): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; orq %rdi, %rdx, %rdi -; orq %rsi, %rcx, %rsi ; movq %rdi, %rax +; orq %rax, %rdx, %rax ; movq %rsi, %rdx +; orq %rdx, %rcx, %rdx ; movq %rbp, %rsp ; popq %rbp ; ret @@ -79,10 +79,10 @@ block0(v0: i128, v1: i128): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; xorq %rdi, %rdx, %rdi -; xorq %rsi, %rcx, %rsi ; movq %rdi, %rax +; xorq %rax, %rdx, %rax ; movq %rsi, %rdx +; xorq %rdx, %rcx, %rdx ; movq %rbp, %rsp ; popq %rbp ; ret @@ -96,10 +96,10 @@ block0(v0: i128): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; notq %rdi, %rdi -; notq %rsi, %rsi ; movq %rdi, %rax +; notq %rax, %rax ; movq %rsi, %rdx +; notq %rdx, %rdx ; movq %rbp, %rsp ; popq %rbp ; ret @@ -115,11 +115,11 @@ block0(v0: i128, v1: i128): ; block0: ; movq %rdi, %r8 ; imulq %r8, %rcx, %r8 +; movq %rdi, %rax ; imulq %rsi, %rdx, %rsi ; movq %r8, %r9 ; addq %r9, %rsi, %r9 ; movq %r9, %r8 -; movq %rdi, %rax ; mul %rax, %rdx, %rax, %rdx ; movq %r8, %rdi ; addq %rdi, %rdx, %rdi @@ -466,9 +466,9 @@ block0(v0: b1): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; andq %rdi, $1, %rdi -; xorq %rdx, %rdx, %rdx ; movq %rdi, %rax +; andq %rax, $1, %rax +; xorq %rdx, %rdx, %rdx ; movq %rbp, %rsp ; popq %rbp ; ret @@ -486,16 +486,17 @@ block0(v0: i128): ; shrq $1, %rdx, %rdx ; movabsq $8608480567731124087, %r10 ; andq %rdx, %r10, %rdx -; subq %rdi, %rdx, %rdi +; movq %rdi, %r11 +; subq %r11, %rdx, %r11 ; shrq $1, %rdx, %rdx ; andq %rdx, %r10, %rdx -; subq %rdi, %rdx, %rdi +; subq %r11, %rdx, %r11 ; shrq $1, %rdx, %rdx ; andq %rdx, %r10, %rdx -; subq %rdi, %rdx, %rdi -; movq %rdi, %rax +; subq %r11, %rdx, %r11 +; movq %r11, %rax ; shrq $4, %rax, %rax -; addq %rax, %rdi, %rax +; addq %rax, %r11, %rax ; movabsq $1085102592571150095, %rcx ; andq %rax, %rcx, %rax ; movabsq $72340172838076673, %r9 @@ -505,16 +506,17 @@ block0(v0: i128): ; shrq $1, %rcx, %rcx ; movabsq $8608480567731124087, %r8 ; andq %rcx, %r8, %rcx -; subq %rsi, %rcx, %rsi +; movq %rsi, %r9 +; subq %r9, %rcx, %r9 ; shrq $1, %rcx, %rcx ; andq %rcx, %r8, %rcx -; subq %rsi, %rcx, %rsi +; subq %r9, %rcx, %r9 ; shrq $1, %rcx, %rcx ; andq %rcx, %r8, %rcx -; subq %rsi, %rcx, %rsi -; movq %rsi, %rcx +; subq %r9, %rcx, %r9 +; movq %r9, %rcx ; shrq $4, %rcx, %rcx -; addq %rcx, %rsi, %rcx +; addq %rcx, %r9, %rcx ; movabsq $1085102592571150095, %rsi ; andq %rcx, %rsi, %rcx ; movabsq $72340172838076673, %rdx @@ -538,10 +540,11 @@ block0(v0: i128): ; movabsq $6148914691236517205, %r8 ; movq %rsi, %r9 ; andq %r9, %r8, %r9 -; shrq $1, %rsi, %rsi -; andq %rsi, %r8, %rsi +; movq %rsi, %rax +; shrq $1, %rax, %rax +; andq %rax, %r8, %rax ; shlq $1, %r9, %r9 -; orq %r9, %rsi, %r9 +; orq %r9, %rax, %r9 ; movabsq $3689348814741910323, %r11 ; movq %r9, %rsi ; andq %rsi, %r11, %rsi @@ -579,10 +582,11 @@ block0(v0: i128): ; movabsq $6148914691236517205, %rcx ; movq %rdi, %rdx ; andq %rdx, %rcx, %rdx -; shrq $1, %rdi, %rdi -; andq %rdi, %rcx, %rdi +; movq %rdi, %r11 +; shrq $1, %r11, %r11 +; andq %r11, %rcx, %r11 ; shlq $1, %rdx, %rdx -; orq %rdx, %rdi, %rdx +; orq %rdx, %r11, %rdx ; movabsq $3689348814741910323, %r9 ; movq %rdx, %r10 ; andq %r10, %r9, %r10 @@ -708,32 +712,42 @@ block0(v0: i128, v1: i128, v2: i64, v3: i128, v4: i128, v5: i128): ; pushq %rbp ; movq %rsp, %rbp -; subq %rsp, $16, %rsp +; subq %rsp, $32, %rsp ; movq %rbx, 0(%rsp) -; movq %r13, 8(%rsp) +; movq %r12, 8(%rsp) +; movq %r13, 16(%rsp) ; block0: +; movq %r9, %r13 +; movq %rcx, %rax +; movq %r8, %rcx +; movq %rax, %r8 ; movq 16(%rbp), %rbx ; movq 24(%rbp), %rax -; movq 32(%rbp), %r10 -; movq %r10, %r13 -; movq 40(%rbp), %r11 -; movq 48(%rbp), %r10 -; addq %rdi, %rdx, %rdi -; movq %rcx, %rdx -; adcq %rsi, %rdx, %rsi +; movq 32(%rbp), %r9 +; movq %r9, %r12 +; movq 40(%rbp), %r10 +; movq 48(%rbp), %r11 +; movq %rdi, %r9 +; addq %r9, %rdx, %r9 +; movq %r8, %rdi +; movq %rsi, %r8 +; adcq %r8, %rdi, %r8 ; xorq %rdx, %rdx, %rdx -; addq %r9, %r8, %r9 +; movq %rcx, %rsi +; movq %r13, %rdi +; addq %rdi, %rsi, %rdi ; adcq %rbx, %rdx, %rbx -; addq %rax, %r11, %rax -; movq %r13, %rdx -; adcq %rdx, %r10, %rdx -; addq %rdi, %r9, %rdi -; adcq %rsi, %rbx, %rsi -; addq %rax, %rdi, %rax -; adcq %rdx, %rsi, %rdx +; addq %rax, %r10, %rax +; movq %r12, %rdx +; adcq %rdx, %r11, %rdx +; addq %r9, %rdi, %r9 +; adcq %r8, %rbx, %r8 +; addq %rax, %r9, %rax +; adcq %rdx, %r8, %rdx ; movq 0(%rsp), %rbx -; movq 8(%rsp), %r13 -; addq %rsp, $16, %rsp +; movq 8(%rsp), %r12 +; movq 16(%rsp), %r13 +; addq %rsp, $32, %rsp ; movq %rbp, %rsp ; popq %rbp ; ret @@ -746,37 +760,41 @@ block0(v0: i128): ; pushq %rbp ; movq %rsp, %rbp -; subq %rsp, $32, %rsp +; subq %rsp, $48, %rsp ; movq %rbx, 0(%rsp) ; movq %r12, 8(%rsp) -; movq %r14, 16(%rsp) -; movq %r15, 24(%rsp) +; movq %r13, 16(%rsp) +; movq %r14, 24(%rsp) +; movq %r15, 32(%rsp) ; block0: ; movq %rdx, %r12 ; movq %rdi, %rax ; movq %rsi, %rdx ; movq %rdi, %r14 ; movq %rsi, %rbx +; movq %rdi, %r13 +; movq %rsi, %r15 ; movq %rdi, %r11 -; movq %rsi, %r9 ; movq %rdi, %r10 -; movq %rdi, %r8 ; movq %rsi, %rcx -; movq %r12, %r15 -; movq %r14, 0(%r15) -; movq %rbx, 8(%r15) -; movq %r11, 16(%r15) -; movq %r9, 24(%r15) -; movq %r10, 32(%r15) -; movq %r8, 40(%r15) -; movq %rcx, 48(%r15) -; movq %rdi, 56(%r15) -; movq %rsi, 64(%r15) +; movq %rdi, %r8 +; movq %rsi, %r9 +; movq %r12, %rdi +; movq %r14, 0(%rdi) +; movq %rbx, 8(%rdi) +; movq %r13, 16(%rdi) +; movq %r15, 24(%rdi) +; movq %r11, 32(%rdi) +; movq %r10, 40(%rdi) +; movq %rcx, 48(%rdi) +; movq %r8, 56(%rdi) +; movq %r9, 64(%rdi) ; movq 0(%rsp), %rbx ; movq 8(%rsp), %r12 -; movq 16(%rsp), %r14 -; movq 24(%rsp), %r15 -; addq %rsp, $32, %rsp +; movq 16(%rsp), %r13 +; movq 24(%rsp), %r14 +; movq 32(%rsp), %r15 +; addq %rsp, $48, %rsp ; movq %rbp, %rsp ; popq %rbp ; ret @@ -873,8 +891,8 @@ block0(v0: i8, v1: i128): ; block0: ; movq %rsi, %rcx ; andq %rcx, $7, %rcx -; shlb %cl, %dil, %dil ; movq %rdi, %rax +; shlb %cl, %al, %al ; movq %rbp, %rsp ; popq %rbp ; ret @@ -891,19 +909,21 @@ block0(v0: i128, v1: i128): ; movq %rdx, %rcx ; movq %rdi, %rdx ; shlq %cl, %rdx, %rdx -; shlq %cl, %rsi, %rsi -; movq %rcx, %rax +; movq %rsi, %r8 +; shlq %cl, %r8, %r8 +; movq %rcx, %rsi ; movl $64, %ecx -; movq %rax, %r8 -; subq %rcx, %r8, %rcx -; shrq %cl, %rdi, %rdi +; movq %rsi, %r9 +; subq %rcx, %r9, %rcx +; movq %rdi, %rsi +; shrq %cl, %rsi, %rsi ; xorq %rax, %rax, %rax -; testq $127, %r8 -; cmovzq %rax, %rdi, %rdi -; orq %rdi, %rsi, %rdi -; testq $64, %r8 +; testq $127, %r9 +; cmovzq %rax, %rsi, %rsi +; orq %rsi, %r8, %rsi +; testq $64, %r9 ; cmovzq %rdx, %rax, %rax -; cmovzq %rdi, %rdx, %rdx +; cmovzq %rsi, %rdx, %rdx ; movq %rbp, %rsp ; popq %rbp ; ret @@ -918,21 +938,23 @@ block0(v0: i128, v1: i128): ; movq %rsp, %rbp ; block0: ; movq %rdx, %rcx -; shrq %cl, %rdi, %rdi +; movq %rdi, %r10 +; shrq %cl, %r10, %r10 ; movq %rsi, %r8 ; shrq %cl, %r8, %r8 ; movl $64, %ecx ; movq %rdx, %r9 ; subq %rcx, %r9, %rcx -; shlq %cl, %rsi, %rsi +; movq %rsi, %rdi +; shlq %cl, %rdi, %rdi ; xorq %r11, %r11, %r11 ; testq $127, %r9 -; cmovzq %r11, %rsi, %rsi -; orq %rsi, %rdi, %rsi +; cmovzq %r11, %rdi, %rdi +; orq %rdi, %r10, %rdi ; xorq %rdx, %rdx, %rdx ; testq $64, %r9 ; movq %r8, %rax -; cmovzq %rsi, %rax, %rax +; cmovzq %rdi, %rax, %rax ; cmovzq %r8, %rdx, %rdx ; movq %rbp, %rsp ; popq %rbp @@ -948,25 +970,25 @@ block0(v0: i128, v1: i128): ; movq %rsp, %rbp ; block0: ; movq %rdx, %rcx -; shrq %cl, %rdi, %rdi -; movq %rsi, %rdx -; sarq %cl, %rdx, %rdx -; movq %rcx, %rax +; movq %rdi, %r8 +; shrq %cl, %r8, %r8 +; movq %rsi, %rdi +; sarq %cl, %rdi, %rdi ; movl $64, %ecx -; movq %rax, %r8 -; subq %rcx, %r8, %rcx +; movq %rdx, %r9 +; subq %rcx, %r9, %rcx ; movq %rsi, %r11 ; shlq %cl, %r11, %r11 ; xorq %rax, %rax, %rax -; testq $127, %r8 +; testq $127, %r9 ; cmovzq %rax, %r11, %r11 -; orq %rdi, %r11, %rdi -; sarq $63, %rsi, %rsi -; testq $64, %r8 -; movq %rdx, %rax -; cmovzq %rdi, %rax, %rax -; cmovzq %rdx, %rsi, %rsi +; orq %r8, %r11, %r8 ; movq %rsi, %rdx +; sarq $63, %rdx, %rdx +; testq $64, %r9 +; movq %rdi, %rax +; cmovzq %r8, %rax, %rax +; cmovzq %rdi, %rdx, %rdx ; movq %rbp, %rsp ; popq %rbp ; ret @@ -1000,19 +1022,20 @@ block0(v0: i128, v1: i128): ; movl $128, %ecx ; movq %r10, %r8 ; subq %rcx, %r8, %rcx -; shrq %cl, %rdi, %rdi +; movq %rdi, %r9 +; shrq %cl, %r9, %r9 ; movq %rsi, %r11 ; shrq %cl, %r11, %r11 -; movq %rcx, %r8 +; movq %rcx, %rdi ; movl $64, %ecx -; subq %rcx, %r8, %rcx +; subq %rcx, %rdi, %rcx ; shlq %cl, %rsi, %rsi ; xorq %r10, %r10, %r10 -; testq $127, %r8 +; testq $127, %rdi ; cmovzq %r10, %rsi, %rsi -; orq %rsi, %rdi, %rsi +; orq %rsi, %r9, %rsi ; xorq %r10, %r10, %r10 -; testq $64, %r8 +; testq $64, %rdi ; movq %r11, %rdi ; cmovzq %rsi, %rdi, %rdi ; cmovzq %r11, %r10, %r10 @@ -1057,17 +1080,17 @@ block0(v0: i128, v1: i128): ; subq %rcx, %r8, %rcx ; movq %rdi, %r11 ; shlq %cl, %r11, %r11 -; shlq %cl, %rsi, %rsi -; movq %rcx, %r8 +; movq %rsi, %r9 +; shlq %cl, %r9, %r9 +; movq %rcx, %rsi ; movl $64, %ecx -; movq %r8, %r9 -; subq %rcx, %r9, %rcx +; subq %rcx, %rsi, %rcx ; shrq %cl, %rdi, %rdi ; xorq %r8, %r8, %r8 -; testq $127, %r9 +; testq $127, %rsi ; cmovzq %r8, %rdi, %rdi -; orq %rdi, %rsi, %rdi -; testq $64, %r9 +; orq %rdi, %r9, %rdi +; testq $64, %rsi ; cmovzq %r11, %r8, %r8 ; cmovzq %rdi, %r11, %r11 ; orq %rax, %r8, %rax diff --git a/cranelift/filetests/filetests/isa/x64/ishl.clif b/cranelift/filetests/filetests/isa/x64/ishl.clif index 88532f8c5b..e171cf9774 100644 --- a/cranelift/filetests/filetests/isa/x64/ishl.clif +++ b/cranelift/filetests/filetests/isa/x64/ishl.clif @@ -20,19 +20,21 @@ block0(v0: i128, v1: i8): ; movzbq %dl, %rcx ; movq %rdi, %rdx ; shlq %cl, %rdx, %rdx -; shlq %cl, %rsi, %rsi +; movq %rsi, %r8 +; shlq %cl, %r8, %r8 ; movq %rcx, %r11 ; movl $64, %ecx -; movq %r11, %r8 -; subq %rcx, %r8, %rcx -; shrq %cl, %rdi, %rdi +; movq %r11, %r9 +; subq %rcx, %r9, %rcx +; movq %rdi, %rsi +; shrq %cl, %rsi, %rsi ; xorq %rax, %rax, %rax -; testq $127, %r8 -; cmovzq %rax, %rdi, %rdi -; orq %rdi, %rsi, %rdi -; testq $64, %r8 +; testq $127, %r9 +; cmovzq %rax, %rsi, %rsi +; orq %rsi, %r8, %rsi +; testq $64, %r9 ; cmovzq %rdx, %rax, %rax -; cmovzq %rdi, %rdx, %rdx +; cmovzq %rsi, %rdx, %rdx ; movq %rbp, %rsp ; popq %rbp ; ret @@ -54,14 +56,15 @@ block0(v0: i128, v1: i64): ; movl $64, %ecx ; movq %r11, %r8 ; subq %rcx, %r8, %rcx -; shrq %cl, %rdi, %rdi +; movq %rdi, %r11 +; shrq %cl, %r11, %r11 ; xorq %rax, %rax, %rax ; testq $127, %r8 -; cmovzq %rax, %rdi, %rdi -; orq %rdi, %rsi, %rdi +; cmovzq %rax, %r11, %r11 +; orq %r11, %rsi, %r11 ; testq $64, %r8 ; cmovzq %rdx, %rax, %rax -; cmovzq %rdi, %rdx, %rdx +; cmovzq %r11, %rdx, %rdx ; movq %rbp, %rsp ; popq %rbp ; ret @@ -83,14 +86,15 @@ block0(v0: i128, v1: i32): ; movl $64, %ecx ; movq %r11, %r8 ; subq %rcx, %r8, %rcx -; shrq %cl, %rdi, %rdi +; movq %rdi, %r11 +; shrq %cl, %r11, %r11 ; xorq %rax, %rax, %rax ; testq $127, %r8 -; cmovzq %rax, %rdi, %rdi -; orq %rdi, %rsi, %rdi +; cmovzq %rax, %r11, %r11 +; orq %r11, %rsi, %r11 ; testq $64, %r8 ; cmovzq %rdx, %rax, %rax -; cmovzq %rdi, %rdx, %rdx +; cmovzq %r11, %rdx, %rdx ; movq %rbp, %rsp ; popq %rbp ; ret @@ -112,14 +116,15 @@ block0(v0: i128, v1: i16): ; movl $64, %ecx ; movq %r11, %r8 ; subq %rcx, %r8, %rcx -; shrq %cl, %rdi, %rdi +; movq %rdi, %r11 +; shrq %cl, %r11, %r11 ; xorq %rax, %rax, %rax ; testq $127, %r8 -; cmovzq %rax, %rdi, %rdi -; orq %rdi, %rsi, %rdi +; cmovzq %rax, %r11, %r11 +; orq %r11, %rsi, %r11 ; testq $64, %r8 ; cmovzq %rdx, %rax, %rax -; cmovzq %rdi, %rdx, %rdx +; cmovzq %r11, %rdx, %rdx ; movq %rbp, %rsp ; popq %rbp ; ret @@ -141,14 +146,15 @@ block0(v0: i128, v1: i8): ; movl $64, %ecx ; movq %r11, %r8 ; subq %rcx, %r8, %rcx -; shrq %cl, %rdi, %rdi +; movq %rdi, %r11 +; shrq %cl, %r11, %r11 ; xorq %rax, %rax, %rax ; testq $127, %r8 -; cmovzq %rax, %rdi, %rdi -; orq %rdi, %rsi, %rdi +; cmovzq %rax, %r11, %r11 +; orq %r11, %rsi, %r11 ; testq $64, %r8 ; cmovzq %rdx, %rax, %rax -; cmovzq %rdi, %rdx, %rdx +; cmovzq %r11, %rdx, %rdx ; movq %rbp, %rsp ; popq %rbp ; ret @@ -163,8 +169,8 @@ block0(v0: i64, v1: i128): ; movq %rsp, %rbp ; block0: ; movq %rsi, %rcx -; shlq %cl, %rdi, %rdi ; movq %rdi, %rax +; shlq %cl, %rax, %rax ; movq %rbp, %rsp ; popq %rbp ; ret @@ -179,8 +185,8 @@ block0(v0: i32, v1: i128): ; movq %rsp, %rbp ; block0: ; movq %rsi, %rcx -; shll %cl, %edi, %edi ; movq %rdi, %rax +; shll %cl, %eax, %eax ; movq %rbp, %rsp ; popq %rbp ; ret @@ -196,8 +202,8 @@ block0(v0: i16, v1: i128): ; block0: ; movq %rsi, %rcx ; andq %rcx, $15, %rcx -; shlw %cl, %di, %di ; movq %rdi, %rax +; shlw %cl, %ax, %ax ; movq %rbp, %rsp ; popq %rbp ; ret @@ -213,8 +219,8 @@ block0(v0: i8, v1: i128): ; block0: ; movq %rsi, %rcx ; andq %rcx, $7, %rcx -; shlb %cl, %dil, %dil ; movq %rdi, %rax +; shlb %cl, %al, %al ; movq %rbp, %rsp ; popq %rbp ; ret @@ -229,8 +235,8 @@ block0(v0: i64, v1: i64): ; movq %rsp, %rbp ; block0: ; movq %rsi, %rcx -; shlq %cl, %rdi, %rdi ; movq %rdi, %rax +; shlq %cl, %rax, %rax ; movq %rbp, %rsp ; popq %rbp ; ret @@ -245,8 +251,8 @@ block0(v0: i64, v1: i32): ; movq %rsp, %rbp ; block0: ; movq %rsi, %rcx -; shlq %cl, %rdi, %rdi ; movq %rdi, %rax +; shlq %cl, %rax, %rax ; movq %rbp, %rsp ; popq %rbp ; ret @@ -261,8 +267,8 @@ block0(v0: i64, v1: i16): ; movq %rsp, %rbp ; block0: ; movq %rsi, %rcx -; shlq %cl, %rdi, %rdi ; movq %rdi, %rax +; shlq %cl, %rax, %rax ; movq %rbp, %rsp ; popq %rbp ; ret @@ -277,8 +283,8 @@ block0(v0: i64, v1: i8): ; movq %rsp, %rbp ; block0: ; movq %rsi, %rcx -; shlq %cl, %rdi, %rdi ; movq %rdi, %rax +; shlq %cl, %rax, %rax ; movq %rbp, %rsp ; popq %rbp ; ret @@ -293,8 +299,8 @@ block0(v0: i32, v1: i64): ; movq %rsp, %rbp ; block0: ; movq %rsi, %rcx -; shll %cl, %edi, %edi ; movq %rdi, %rax +; shll %cl, %eax, %eax ; movq %rbp, %rsp ; popq %rbp ; ret @@ -309,8 +315,8 @@ block0(v0: i32, v1: i32): ; movq %rsp, %rbp ; block0: ; movq %rsi, %rcx -; shll %cl, %edi, %edi ; movq %rdi, %rax +; shll %cl, %eax, %eax ; movq %rbp, %rsp ; popq %rbp ; ret @@ -325,8 +331,8 @@ block0(v0: i32, v1: i16): ; movq %rsp, %rbp ; block0: ; movq %rsi, %rcx -; shll %cl, %edi, %edi ; movq %rdi, %rax +; shll %cl, %eax, %eax ; movq %rbp, %rsp ; popq %rbp ; ret @@ -341,8 +347,8 @@ block0(v0: i32, v1: i8): ; movq %rsp, %rbp ; block0: ; movq %rsi, %rcx -; shll %cl, %edi, %edi ; movq %rdi, %rax +; shll %cl, %eax, %eax ; movq %rbp, %rsp ; popq %rbp ; ret @@ -358,8 +364,8 @@ block0(v0: i16, v1: i64): ; block0: ; movq %rsi, %rcx ; andq %rcx, $15, %rcx -; shlw %cl, %di, %di ; movq %rdi, %rax +; shlw %cl, %ax, %ax ; movq %rbp, %rsp ; popq %rbp ; ret @@ -375,8 +381,8 @@ block0(v0: i16, v1: i32): ; block0: ; movq %rsi, %rcx ; andq %rcx, $15, %rcx -; shlw %cl, %di, %di ; movq %rdi, %rax +; shlw %cl, %ax, %ax ; movq %rbp, %rsp ; popq %rbp ; ret @@ -392,8 +398,8 @@ block0(v0: i16, v1: i16): ; block0: ; movq %rsi, %rcx ; andq %rcx, $15, %rcx -; shlw %cl, %di, %di ; movq %rdi, %rax +; shlw %cl, %ax, %ax ; movq %rbp, %rsp ; popq %rbp ; ret @@ -409,8 +415,8 @@ block0(v0: i16, v1: i8): ; block0: ; movq %rsi, %rcx ; andq %rcx, $15, %rcx -; shlw %cl, %di, %di ; movq %rdi, %rax +; shlw %cl, %ax, %ax ; movq %rbp, %rsp ; popq %rbp ; ret @@ -426,8 +432,8 @@ block0(v0: i8, v1: i64): ; block0: ; movq %rsi, %rcx ; andq %rcx, $7, %rcx -; shlb %cl, %dil, %dil ; movq %rdi, %rax +; shlb %cl, %al, %al ; movq %rbp, %rsp ; popq %rbp ; ret @@ -443,8 +449,8 @@ block0(v0: i8, v1: i32): ; block0: ; movq %rsi, %rcx ; andq %rcx, $7, %rcx -; shlb %cl, %dil, %dil ; movq %rdi, %rax +; shlb %cl, %al, %al ; movq %rbp, %rsp ; popq %rbp ; ret @@ -460,8 +466,8 @@ block0(v0: i8, v1: i16): ; block0: ; movq %rsi, %rcx ; andq %rcx, $7, %rcx -; shlb %cl, %dil, %dil ; movq %rdi, %rax +; shlb %cl, %al, %al ; movq %rbp, %rsp ; popq %rbp ; ret @@ -477,8 +483,8 @@ block0(v0: i8, v1: i8): ; block0: ; movq %rsi, %rcx ; andq %rcx, $7, %rcx -; shlb %cl, %dil, %dil ; movq %rdi, %rax +; shlb %cl, %al, %al ; movq %rbp, %rsp ; popq %rbp ; ret @@ -492,8 +498,8 @@ block0(v0: i64): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; shlq $1, %rdi, %rdi ; movq %rdi, %rax +; shlq $1, %rax, %rax ; movq %rbp, %rsp ; popq %rbp ; ret @@ -507,8 +513,8 @@ block0(v0: i32): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; shll $1, %edi, %edi ; movq %rdi, %rax +; shll $1, %eax, %eax ; movq %rbp, %rsp ; popq %rbp ; ret @@ -522,8 +528,8 @@ block0(v0: i16): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; shlw $1, %di, %di ; movq %rdi, %rax +; shlw $1, %ax, %ax ; movq %rbp, %rsp ; popq %rbp ; ret @@ -537,8 +543,8 @@ block0(v0: i8): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; shlb $1, %dil, %dil ; movq %rdi, %rax +; shlb $1, %al, %al ; movq %rbp, %rsp ; popq %rbp ; ret diff --git a/cranelift/filetests/filetests/isa/x64/load-op.clif b/cranelift/filetests/filetests/isa/x64/load-op.clif index a2b3e8a019..dff0e567b0 100644 --- a/cranelift/filetests/filetests/isa/x64/load-op.clif +++ b/cranelift/filetests/filetests/isa/x64/load-op.clif @@ -11,8 +11,8 @@ block0(v0: i64, v1: i32): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; addl %esi, 0(%rdi), %esi ; movq %rsi, %rax +; addl %eax, 0(%rdi), %eax ; movq %rbp, %rsp ; popq %rbp ; ret @@ -27,8 +27,8 @@ block0(v0: i64, v1: i32): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; addl %esi, 0(%rdi), %esi ; movq %rsi, %rax +; addl %eax, 0(%rdi), %eax ; movq %rbp, %rsp ; popq %rbp ; ret @@ -43,8 +43,8 @@ block0(v0: i64, v1: i64): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; addq %rsi, 0(%rdi), %rsi ; movq %rsi, %rax +; addq %rax, 0(%rdi), %rax ; movq %rbp, %rsp ; popq %rbp ; ret @@ -56,14 +56,11 @@ block0(v0: i64, v1: i64): return v3 } -;; test narrow loads: 8-bit load should not merge because the `addl` is 32 bits -;; and would load 32 bits from memory, which may go beyond the end of the heap. - ; pushq %rbp ; movq %rsp, %rbp ; block0: -; addq %rsi, 0(%rdi), %rsi ; movq %rsi, %rax +; addq %rax, 0(%rdi), %rax ; movq %rbp, %rsp ; popq %rbp ; ret diff --git a/cranelift/filetests/filetests/isa/x64/narrowing.clif b/cranelift/filetests/filetests/isa/x64/narrowing.clif index e0c8b6f4fc..7ca0306967 100644 --- a/cranelift/filetests/filetests/isa/x64/narrowing.clif +++ b/cranelift/filetests/filetests/isa/x64/narrowing.clif @@ -44,8 +44,9 @@ block0(v0: f64x2): ; cmppd $0, %xmm5, %xmm0, %xmm5 ; movupd const(0), %xmm6 ; andps %xmm5, %xmm6, %xmm5 -; minpd %xmm0, %xmm5, %xmm0 -; cvttpd2dq %xmm0, %xmm0 +; movdqa %xmm0, %xmm9 +; minpd %xmm9, %xmm5, %xmm9 +; cvttpd2dq %xmm9, %xmm0 ; movq %rbp, %rsp ; popq %rbp ; ret diff --git a/cranelift/filetests/filetests/isa/x64/nearest-libcall.clif b/cranelift/filetests/filetests/isa/x64/nearest-libcall.clif index 8de2c110be..53df3468aa 100644 --- a/cranelift/filetests/filetests/isa/x64/nearest-libcall.clif +++ b/cranelift/filetests/filetests/isa/x64/nearest-libcall.clif @@ -10,8 +10,8 @@ block0(v0: f32): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; load_ext_name %NearestF32+0, %r8 -; call *%r8 +; load_ext_name %NearestF32+0, %rdx +; call *%rdx ; movq %rbp, %rsp ; popq %rbp ; ret @@ -25,8 +25,8 @@ block0(v0: f64): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; load_ext_name %NearestF64+0, %r8 -; call *%r8 +; load_ext_name %NearestF64+0, %rdx +; call *%rdx ; movq %rbp, %rsp ; popq %rbp ; ret diff --git a/cranelift/filetests/filetests/isa/x64/popcnt.clif b/cranelift/filetests/filetests/isa/x64/popcnt.clif index 4f0be7407d..4230200d15 100644 --- a/cranelift/filetests/filetests/isa/x64/popcnt.clif +++ b/cranelift/filetests/filetests/isa/x64/popcnt.clif @@ -14,16 +14,17 @@ block0(v0: i64): ; shrq $1, %rax, %rax ; movabsq $8608480567731124087, %r8 ; andq %rax, %r8, %rax -; subq %rdi, %rax, %rdi +; movq %rdi, %r9 +; subq %r9, %rax, %r9 ; shrq $1, %rax, %rax ; andq %rax, %r8, %rax -; subq %rdi, %rax, %rdi +; subq %r9, %rax, %r9 ; shrq $1, %rax, %rax ; andq %rax, %r8, %rax -; subq %rdi, %rax, %rdi -; movq %rdi, %rax +; subq %r9, %rax, %r9 +; movq %r9, %rax ; shrq $4, %rax, %rax -; addq %rax, %rdi, %rax +; addq %rax, %r9, %rax ; movabsq $1085102592571150095, %rsi ; andq %rax, %rsi, %rax ; movabsq $72340172838076673, %rdx @@ -80,16 +81,17 @@ block0(v0: i32): ; shrl $1, %eax, %eax ; movl $2004318071, %r8d ; andl %eax, %r8d, %eax -; subl %edi, %eax, %edi +; movq %rdi, %r9 +; subl %r9d, %eax, %r9d ; shrl $1, %eax, %eax ; andl %eax, %r8d, %eax -; subl %edi, %eax, %edi +; subl %r9d, %eax, %r9d ; shrl $1, %eax, %eax ; andl %eax, %r8d, %eax -; subl %edi, %eax, %edi -; movq %rdi, %rax +; subl %r9d, %eax, %r9d +; movq %r9, %rax ; shrl $4, %eax, %eax -; addl %eax, %edi, %eax +; addl %eax, %r9d, %eax ; andl %eax, $252645135, %eax ; imull %eax, $16843009, %eax ; shrl $24, %eax, %eax diff --git a/cranelift/filetests/filetests/isa/x64/sdiv.clif b/cranelift/filetests/filetests/isa/x64/sdiv.clif index 33c402a5a0..543d7b82a3 100644 --- a/cranelift/filetests/filetests/isa/x64/sdiv.clif +++ b/cranelift/filetests/filetests/isa/x64/sdiv.clif @@ -12,7 +12,6 @@ block0(v0: i8, v1: i8): ; block0: ; movq %rdi, %rax ; cbw %al, %al -; movq %rax, %r11 ; idiv %al, (none), %sil, %al, (none) ; movq %rbp, %rsp ; popq %rbp @@ -29,7 +28,6 @@ block0(v0: i16, v1: i16): ; block0: ; movq %rdi, %rax ; cwd %ax, %dx -; movq %rdx, %rcx ; idiv %ax, %dx, %si, %ax, %dx ; movq %rbp, %rsp ; popq %rbp @@ -46,7 +44,6 @@ block0(v0: i32, v1: i32): ; block0: ; movq %rdi, %rax ; cdq %eax, %edx -; movq %rdx, %rcx ; idiv %eax, %edx, %esi, %eax, %edx ; movq %rbp, %rsp ; popq %rbp @@ -63,7 +60,6 @@ block0(v0: i64, v1: i64): ; block0: ; movq %rdi, %rax ; cqo %rax, %rdx -; movq %rdx, %rcx ; idiv %rax, %rdx, %rsi, %rax, %rdx ; movq %rbp, %rsp ; popq %rbp diff --git a/cranelift/filetests/filetests/isa/x64/select-i128.clif b/cranelift/filetests/filetests/isa/x64/select-i128.clif index c88e3c3c2a..14dd0a352f 100644 --- a/cranelift/filetests/filetests/isa/x64/select-i128.clif +++ b/cranelift/filetests/filetests/isa/x64/select-i128.clif @@ -14,10 +14,11 @@ block0(v0: i32, v1: i128, v2: i128): ; movq %rsp, %rbp ; block0: ; cmpl $42, %edi -; cmovzq %rsi, %rcx, %rcx -; cmovzq %rdx, %r8, %r8 ; movq %rcx, %rax -; movq %r8, %rdx +; cmovzq %rsi, %rax, %rax +; movq %r8, %rcx +; cmovzq %rdx, %rcx, %rcx +; movq %rcx, %rdx ; movq %rbp, %rsp ; popq %rbp ; ret @@ -33,12 +34,12 @@ block0(v0: f32, v1: i128, v2: i128): ; movq %rsp, %rbp ; block0: ; ucomiss %xmm0, %xmm0 -; cmovnzq %rdx, %rdi, %rdi -; cmovpq %rdx, %rdi, %rdi -; cmovnzq %rcx, %rsi, %rsi -; cmovpq %rcx, %rsi, %rsi ; movq %rdi, %rax +; cmovnzq %rdx, %rax, %rax +; cmovpq %rdx, %rax, %rax ; movq %rsi, %rdx +; cmovnzq %rcx, %rdx, %rdx +; cmovpq %rcx, %rdx, %rdx ; movq %rbp, %rsp ; popq %rbp ; ret diff --git a/cranelift/filetests/filetests/isa/x64/simd-bitwise-compile.clif b/cranelift/filetests/filetests/isa/x64/simd-bitwise-compile.clif index f778d8e024..69141f4750 100644 --- a/cranelift/filetests/filetests/isa/x64/simd-bitwise-compile.clif +++ b/cranelift/filetests/filetests/isa/x64/simd-bitwise-compile.clif @@ -159,8 +159,9 @@ block0(v0: b16x8, v1: i16x8, v2: i16x8): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; pblendvb %xmm2, %xmm1, %xmm2 -; movdqa %xmm2, %xmm0 +; movdqa %xmm2, %xmm6 +; pblendvb %xmm6, %xmm1, %xmm6 +; movdqa %xmm6, %xmm0 ; movq %rbp, %rsp ; popq %rbp ; ret @@ -174,8 +175,9 @@ block0(v0: b32x4, v1: f32x4, v2: f32x4): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; blendvps %xmm2, %xmm1, %xmm2 -; movdqa %xmm2, %xmm0 +; movdqa %xmm2, %xmm6 +; blendvps %xmm6, %xmm1, %xmm6 +; movdqa %xmm6, %xmm0 ; movq %rbp, %rsp ; popq %rbp ; ret @@ -189,8 +191,9 @@ block0(v0: b64x2, v1: f64x2, v2: f64x2): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; blendvpd %xmm2, %xmm1, %xmm2 -; movdqa %xmm2, %xmm0 +; movdqa %xmm2, %xmm6 +; blendvpd %xmm6, %xmm1, %xmm6 +; movdqa %xmm6, %xmm0 ; movq %rbp, %rsp ; popq %rbp ; ret @@ -206,12 +209,13 @@ block0(v0: i32): ; movq %rsp, %rbp ; block0: ; movdqu const(1), %xmm0 -; andq %rdi, $7, %rdi -; movd %edi, %xmm6 +; movq %rdi, %r11 +; andq %r11, $7, %r11 +; movd %r11d, %xmm6 ; psllw %xmm0, %xmm6, %xmm0 -; lea const(0), %rax -; shlq $4, %rdi, %rdi -; movdqu 0(%rax,%rdi,1), %xmm14 +; lea const(0), %rdi +; shlq $4, %r11, %r11 +; movdqu 0(%rdi,%r11,1), %xmm14 ; pand %xmm0, %xmm14, %xmm0 ; movq %rbp, %rsp ; popq %rbp @@ -252,12 +256,13 @@ block0(v0: i32): ; movq %rsp, %rbp ; block0: ; movdqu const(0), %xmm9 -; andq %rdi, $7, %rdi +; movq %rdi, %r10 +; andq %r10, $7, %r10 ; movdqa %xmm9, %xmm0 ; punpcklbw %xmm0, %xmm9, %xmm0 ; punpckhbw %xmm9, %xmm9, %xmm9 -; addl %edi, $8, %edi -; movd %edi, %xmm12 +; addl %r10d, $8, %r10d +; movd %r10d, %xmm12 ; psraw %xmm0, %xmm12, %xmm0 ; psraw %xmm9, %xmm12, %xmm9 ; packsswb %xmm0, %xmm9, %xmm0 @@ -279,14 +284,14 @@ block0(v0: i8x16, v1: i32): ; movdqa %xmm0, %xmm14 ; punpcklbw %xmm14, %xmm0, %xmm14 ; movdqa %xmm14, %xmm13 -; punpckhbw %xmm0, %xmm0, %xmm0 -; movdqa %xmm0, %xmm6 +; movdqa %xmm0, %xmm14 +; punpckhbw %xmm14, %xmm0, %xmm14 ; addl %r11d, $8, %r11d -; movd %r11d, %xmm14 +; movd %r11d, %xmm15 ; movdqa %xmm13, %xmm0 -; psraw %xmm0, %xmm14, %xmm0 -; psraw %xmm6, %xmm14, %xmm6 -; packsswb %xmm0, %xmm6, %xmm0 +; psraw %xmm0, %xmm15, %xmm0 +; psraw %xmm14, %xmm15, %xmm14 +; packsswb %xmm0, %xmm14, %xmm0 ; movq %rbp, %rsp ; popq %rbp ; ret diff --git a/cranelift/filetests/filetests/isa/x64/simd-logical-compile.clif b/cranelift/filetests/filetests/isa/x64/simd-logical-compile.clif index 2be24e4b18..40ab8cc76b 100644 --- a/cranelift/filetests/filetests/isa/x64/simd-logical-compile.clif +++ b/cranelift/filetests/filetests/isa/x64/simd-logical-compile.clif @@ -42,8 +42,9 @@ block0(v0: i64x2): ; movq %rsp, %rbp ; block0: ; pxor %xmm3, %xmm3, %xmm3 -; pcmpeqq %xmm0, %xmm3, %xmm0 -; ptest %xmm0, %xmm0 +; movdqa %xmm0, %xmm5 +; pcmpeqq %xmm5, %xmm3, %xmm5 +; ptest %xmm5, %xmm5 ; setz %al ; movq %rbp, %rsp ; popq %rbp diff --git a/cranelift/filetests/filetests/isa/x64/srem.clif b/cranelift/filetests/filetests/isa/x64/srem.clif index a176ef793c..131d7ac89f 100644 --- a/cranelift/filetests/filetests/isa/x64/srem.clif +++ b/cranelift/filetests/filetests/isa/x64/srem.clif @@ -10,9 +10,8 @@ block0(v0: i8, v1: i8): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; xorl %r10d, %r10d, %r10d ; movq %rdi, %rax -; movq %r10, %rdx +; xorl %edx, %edx, %edx ; srem_seq %al, %dl, %sil, %al, %dl, tmp=(none) ; shrq $8, %rax, %rax ; movq %rbp, %rsp @@ -28,9 +27,8 @@ block0(v0: i16, v1: i16): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; xorl %r10d, %r10d, %r10d ; movq %rdi, %rax -; movq %r10, %rdx +; xorl %edx, %edx, %edx ; srem_seq %ax, %dx, %si, %ax, %dx, tmp=(none) ; movq %rdx, %rax ; movq %rbp, %rsp @@ -46,9 +44,8 @@ block0(v0: i32, v1: i32): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; xorl %r10d, %r10d, %r10d ; movq %rdi, %rax -; movq %r10, %rdx +; xorl %edx, %edx, %edx ; srem_seq %eax, %edx, %esi, %eax, %edx, tmp=(none) ; movq %rdx, %rax ; movq %rbp, %rsp @@ -64,9 +61,8 @@ block0(v0: i64, v1: i64): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; xorl %r10d, %r10d, %r10d ; movq %rdi, %rax -; movq %r10, %rdx +; xorl %edx, %edx, %edx ; srem_seq %rax, %rdx, %rsi, %rax, %rdx, tmp=(none) ; movq %rdx, %rax ; movq %rbp, %rsp diff --git a/cranelift/filetests/filetests/isa/x64/sshr.clif b/cranelift/filetests/filetests/isa/x64/sshr.clif index b92e9c9001..50f88e0612 100644 --- a/cranelift/filetests/filetests/isa/x64/sshr.clif +++ b/cranelift/filetests/filetests/isa/x64/sshr.clif @@ -17,25 +17,26 @@ block0(v0: i128, v1: i8): ; movq %rsp, %rbp ; block0: ; movzbq %dl, %rcx -; shrq %cl, %rdi, %rdi -; movq %rsi, %rdx -; sarq %cl, %rdx, %rdx +; movq %rdi, %r8 +; shrq %cl, %r8, %r8 +; movq %rsi, %rdi +; sarq %cl, %rdi, %rdi ; movq %rcx, %rax ; movl $64, %ecx -; movq %rax, %r8 -; subq %rcx, %r8, %rcx +; movq %rax, %r9 +; subq %rcx, %r9, %rcx ; movq %rsi, %r11 ; shlq %cl, %r11, %r11 ; xorq %rax, %rax, %rax -; testq $127, %r8 +; testq $127, %r9 ; cmovzq %rax, %r11, %r11 -; orq %rdi, %r11, %rdi -; sarq $63, %rsi, %rsi -; testq $64, %r8 -; movq %rdx, %rax -; cmovzq %rdi, %rax, %rax -; cmovzq %rdx, %rsi, %rsi +; orq %r8, %r11, %r8 ; movq %rsi, %rdx +; sarq $63, %rdx, %rdx +; testq $64, %r9 +; movq %rdi, %rax +; cmovzq %r8, %rax, %rax +; cmovzq %rdi, %rdx, %rdx ; movq %rbp, %rsp ; popq %rbp ; ret @@ -50,23 +51,25 @@ block0(v0: i128, v1: i64): ; movq %rsp, %rbp ; block0: ; movq %rdx, %rcx -; shrq %cl, %rdi, %rdi +; movq %rdi, %r8 +; shrq %cl, %r8, %r8 ; movq %rsi, %r11 ; sarq %cl, %r11, %r11 ; movl $64, %ecx -; subq %rcx, %rdx, %rcx +; movq %rdx, %r9 +; subq %rcx, %r9, %rcx ; movq %rsi, %r10 ; shlq %cl, %r10, %r10 -; xorq %rax, %rax, %rax -; testq $127, %rdx -; cmovzq %rax, %r10, %r10 -; orq %rdi, %r10, %rdi -; sarq $63, %rsi, %rsi -; testq $64, %rdx -; movq %r11, %rax -; cmovzq %rdi, %rax, %rax -; cmovzq %r11, %rsi, %rsi +; xorq %rdi, %rdi, %rdi +; testq $127, %r9 +; cmovzq %rdi, %r10, %r10 +; orq %r8, %r10, %r8 ; movq %rsi, %rdx +; sarq $63, %rdx, %rdx +; testq $64, %r9 +; movq %r11, %rax +; cmovzq %r8, %rax, %rax +; cmovzq %r11, %rdx, %rdx ; movq %rbp, %rsp ; popq %rbp ; ret @@ -81,23 +84,25 @@ block0(v0: i128, v1: i32): ; movq %rsp, %rbp ; block0: ; movq %rdx, %rcx -; shrq %cl, %rdi, %rdi +; movq %rdi, %r8 +; shrq %cl, %r8, %r8 ; movq %rsi, %r11 ; sarq %cl, %r11, %r11 ; movl $64, %ecx -; subq %rcx, %rdx, %rcx +; movq %rdx, %r9 +; subq %rcx, %r9, %rcx ; movq %rsi, %r10 ; shlq %cl, %r10, %r10 -; xorq %rax, %rax, %rax -; testq $127, %rdx -; cmovzq %rax, %r10, %r10 -; orq %rdi, %r10, %rdi -; sarq $63, %rsi, %rsi -; testq $64, %rdx -; movq %r11, %rax -; cmovzq %rdi, %rax, %rax -; cmovzq %r11, %rsi, %rsi +; xorq %rdi, %rdi, %rdi +; testq $127, %r9 +; cmovzq %rdi, %r10, %r10 +; orq %r8, %r10, %r8 ; movq %rsi, %rdx +; sarq $63, %rdx, %rdx +; testq $64, %r9 +; movq %r11, %rax +; cmovzq %r8, %rax, %rax +; cmovzq %r11, %rdx, %rdx ; movq %rbp, %rsp ; popq %rbp ; ret @@ -112,23 +117,25 @@ block0(v0: i128, v1: i16): ; movq %rsp, %rbp ; block0: ; movq %rdx, %rcx -; shrq %cl, %rdi, %rdi +; movq %rdi, %r8 +; shrq %cl, %r8, %r8 ; movq %rsi, %r11 ; sarq %cl, %r11, %r11 ; movl $64, %ecx -; subq %rcx, %rdx, %rcx +; movq %rdx, %r9 +; subq %rcx, %r9, %rcx ; movq %rsi, %r10 ; shlq %cl, %r10, %r10 -; xorq %rax, %rax, %rax -; testq $127, %rdx -; cmovzq %rax, %r10, %r10 -; orq %rdi, %r10, %rdi -; sarq $63, %rsi, %rsi -; testq $64, %rdx -; movq %r11, %rax -; cmovzq %rdi, %rax, %rax -; cmovzq %r11, %rsi, %rsi +; xorq %rdi, %rdi, %rdi +; testq $127, %r9 +; cmovzq %rdi, %r10, %r10 +; orq %r8, %r10, %r8 ; movq %rsi, %rdx +; sarq $63, %rdx, %rdx +; testq $64, %r9 +; movq %r11, %rax +; cmovzq %r8, %rax, %rax +; cmovzq %r11, %rdx, %rdx ; movq %rbp, %rsp ; popq %rbp ; ret @@ -143,23 +150,25 @@ block0(v0: i128, v1: i8): ; movq %rsp, %rbp ; block0: ; movq %rdx, %rcx -; shrq %cl, %rdi, %rdi +; movq %rdi, %r8 +; shrq %cl, %r8, %r8 ; movq %rsi, %r11 ; sarq %cl, %r11, %r11 ; movl $64, %ecx -; subq %rcx, %rdx, %rcx +; movq %rdx, %r9 +; subq %rcx, %r9, %rcx ; movq %rsi, %r10 ; shlq %cl, %r10, %r10 -; xorq %rax, %rax, %rax -; testq $127, %rdx -; cmovzq %rax, %r10, %r10 -; orq %rdi, %r10, %rdi -; sarq $63, %rsi, %rsi -; testq $64, %rdx -; movq %r11, %rax -; cmovzq %rdi, %rax, %rax -; cmovzq %r11, %rsi, %rsi +; xorq %rdi, %rdi, %rdi +; testq $127, %r9 +; cmovzq %rdi, %r10, %r10 +; orq %r8, %r10, %r8 ; movq %rsi, %rdx +; sarq $63, %rdx, %rdx +; testq $64, %r9 +; movq %r11, %rax +; cmovzq %r8, %rax, %rax +; cmovzq %r11, %rdx, %rdx ; movq %rbp, %rsp ; popq %rbp ; ret @@ -174,8 +183,8 @@ block0(v0: i64, v1: i128): ; movq %rsp, %rbp ; block0: ; movq %rsi, %rcx -; sarq %cl, %rdi, %rdi ; movq %rdi, %rax +; sarq %cl, %rax, %rax ; movq %rbp, %rsp ; popq %rbp ; ret @@ -190,8 +199,8 @@ block0(v0: i32, v1: i128): ; movq %rsp, %rbp ; block0: ; movq %rsi, %rcx -; sarl %cl, %edi, %edi ; movq %rdi, %rax +; sarl %cl, %eax, %eax ; movq %rbp, %rsp ; popq %rbp ; ret @@ -207,8 +216,8 @@ block0(v0: i16, v1: i128): ; block0: ; movq %rsi, %rcx ; andq %rcx, $15, %rcx -; sarw %cl, %di, %di ; movq %rdi, %rax +; sarw %cl, %ax, %ax ; movq %rbp, %rsp ; popq %rbp ; ret @@ -224,8 +233,8 @@ block0(v0: i8, v1: i128): ; block0: ; movq %rsi, %rcx ; andq %rcx, $7, %rcx -; sarb %cl, %dil, %dil ; movq %rdi, %rax +; sarb %cl, %al, %al ; movq %rbp, %rsp ; popq %rbp ; ret @@ -240,8 +249,8 @@ block0(v0: i64, v1: i64): ; movq %rsp, %rbp ; block0: ; movq %rsi, %rcx -; sarq %cl, %rdi, %rdi ; movq %rdi, %rax +; sarq %cl, %rax, %rax ; movq %rbp, %rsp ; popq %rbp ; ret @@ -256,8 +265,8 @@ block0(v0: i64, v1: i32): ; movq %rsp, %rbp ; block0: ; movq %rsi, %rcx -; sarq %cl, %rdi, %rdi ; movq %rdi, %rax +; sarq %cl, %rax, %rax ; movq %rbp, %rsp ; popq %rbp ; ret @@ -272,8 +281,8 @@ block0(v0: i64, v1: i16): ; movq %rsp, %rbp ; block0: ; movq %rsi, %rcx -; sarq %cl, %rdi, %rdi ; movq %rdi, %rax +; sarq %cl, %rax, %rax ; movq %rbp, %rsp ; popq %rbp ; ret @@ -288,8 +297,8 @@ block0(v0: i64, v1: i8): ; movq %rsp, %rbp ; block0: ; movq %rsi, %rcx -; sarq %cl, %rdi, %rdi ; movq %rdi, %rax +; sarq %cl, %rax, %rax ; movq %rbp, %rsp ; popq %rbp ; ret @@ -304,8 +313,8 @@ block0(v0: i32, v1: i64): ; movq %rsp, %rbp ; block0: ; movq %rsi, %rcx -; sarl %cl, %edi, %edi ; movq %rdi, %rax +; sarl %cl, %eax, %eax ; movq %rbp, %rsp ; popq %rbp ; ret @@ -320,8 +329,8 @@ block0(v0: i32, v1: i32): ; movq %rsp, %rbp ; block0: ; movq %rsi, %rcx -; sarl %cl, %edi, %edi ; movq %rdi, %rax +; sarl %cl, %eax, %eax ; movq %rbp, %rsp ; popq %rbp ; ret @@ -336,8 +345,8 @@ block0(v0: i32, v1: i16): ; movq %rsp, %rbp ; block0: ; movq %rsi, %rcx -; sarl %cl, %edi, %edi ; movq %rdi, %rax +; sarl %cl, %eax, %eax ; movq %rbp, %rsp ; popq %rbp ; ret @@ -352,8 +361,8 @@ block0(v0: i32, v1: i8): ; movq %rsp, %rbp ; block0: ; movq %rsi, %rcx -; sarl %cl, %edi, %edi ; movq %rdi, %rax +; sarl %cl, %eax, %eax ; movq %rbp, %rsp ; popq %rbp ; ret @@ -369,8 +378,8 @@ block0(v0: i16, v1: i64): ; block0: ; movq %rsi, %rcx ; andq %rcx, $15, %rcx -; sarw %cl, %di, %di ; movq %rdi, %rax +; sarw %cl, %ax, %ax ; movq %rbp, %rsp ; popq %rbp ; ret @@ -386,8 +395,8 @@ block0(v0: i16, v1: i32): ; block0: ; movq %rsi, %rcx ; andq %rcx, $15, %rcx -; sarw %cl, %di, %di ; movq %rdi, %rax +; sarw %cl, %ax, %ax ; movq %rbp, %rsp ; popq %rbp ; ret @@ -403,8 +412,8 @@ block0(v0: i16, v1: i16): ; block0: ; movq %rsi, %rcx ; andq %rcx, $15, %rcx -; sarw %cl, %di, %di ; movq %rdi, %rax +; sarw %cl, %ax, %ax ; movq %rbp, %rsp ; popq %rbp ; ret @@ -420,8 +429,8 @@ block0(v0: i16, v1: i8): ; block0: ; movq %rsi, %rcx ; andq %rcx, $15, %rcx -; sarw %cl, %di, %di ; movq %rdi, %rax +; sarw %cl, %ax, %ax ; movq %rbp, %rsp ; popq %rbp ; ret @@ -437,8 +446,8 @@ block0(v0: i8, v1: i64): ; block0: ; movq %rsi, %rcx ; andq %rcx, $7, %rcx -; sarb %cl, %dil, %dil ; movq %rdi, %rax +; sarb %cl, %al, %al ; movq %rbp, %rsp ; popq %rbp ; ret @@ -454,8 +463,8 @@ block0(v0: i8, v1: i32): ; block0: ; movq %rsi, %rcx ; andq %rcx, $7, %rcx -; sarb %cl, %dil, %dil ; movq %rdi, %rax +; sarb %cl, %al, %al ; movq %rbp, %rsp ; popq %rbp ; ret @@ -471,8 +480,8 @@ block0(v0: i8, v1: i16): ; block0: ; movq %rsi, %rcx ; andq %rcx, $7, %rcx -; sarb %cl, %dil, %dil ; movq %rdi, %rax +; sarb %cl, %al, %al ; movq %rbp, %rsp ; popq %rbp ; ret @@ -488,13 +497,12 @@ block0(v0: i8, v1: i8): ; block0: ; movq %rsi, %rcx ; andq %rcx, $7, %rcx -; sarb %cl, %dil, %dil ; movq %rdi, %rax +; sarb %cl, %al, %al ; movq %rbp, %rsp ; popq %rbp ; ret - function %sshr_i64_const(i64) -> i64 { block0(v0: i64): v1 = sshr_imm.i64 v0, 65 @@ -504,8 +512,8 @@ block0(v0: i64): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; sarq $1, %rdi, %rdi ; movq %rdi, %rax +; sarq $1, %rax, %rax ; movq %rbp, %rsp ; popq %rbp ; ret @@ -519,8 +527,8 @@ block0(v0: i32): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; sarl $1, %edi, %edi ; movq %rdi, %rax +; sarl $1, %eax, %eax ; movq %rbp, %rsp ; popq %rbp ; ret @@ -534,8 +542,8 @@ block0(v0: i16): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; sarw $1, %di, %di ; movq %rdi, %rax +; sarw $1, %ax, %ax ; movq %rbp, %rsp ; popq %rbp ; ret @@ -549,8 +557,8 @@ block0(v0: i8): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; sarb $1, %dil, %dil ; movq %rdi, %rax +; sarb $1, %al, %al ; movq %rbp, %rsp ; popq %rbp ; ret diff --git a/cranelift/filetests/filetests/isa/x64/struct-arg.clif b/cranelift/filetests/filetests/isa/x64/struct-arg.clif index aac589b2ce..e4c5363071 100644 --- a/cranelift/filetests/filetests/isa/x64/struct-arg.clif +++ b/cranelift/filetests/filetests/isa/x64/struct-arg.clif @@ -46,14 +46,13 @@ block0(v0: i64): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; movq %rdi, %r8 +; movq %rdi, %rsi ; subq %rsp, $64, %rsp ; virtual_sp_offset_adjust 64 ; lea 0(%rsp), %rdi -; movq %r8, %rsi ; movl $64, %edx -; load_ext_name %Memcpy+0, %rcx -; call *%rcx +; load_ext_name %Memcpy+0, %rax +; call *%rax ; call User(userextname0) ; addq %rsp, $64, %rsp ; virtual_sp_offset_adjust -64 @@ -72,20 +71,20 @@ block0(v0: i64, v1: i64): ; pushq %rbp ; movq %rsp, %rbp ; subq %rsp, $16, %rsp -; movq %r12, 0(%rsp) +; movq %r14, 0(%rsp) ; block0: -; movq %rdi, %r12 +; movq %rdi, %r14 ; subq %rsp, $64, %rsp ; virtual_sp_offset_adjust 64 ; lea 0(%rsp), %rdi ; movl $64, %edx ; load_ext_name %Memcpy+0, %rcx ; call *%rcx -; movq %r12, %rdi +; movq %r14, %rdi ; call User(userextname0) ; addq %rsp, $64, %rsp ; virtual_sp_offset_adjust -64 -; movq 0(%rsp), %r12 +; movq 0(%rsp), %r14 ; addq %rsp, $16, %rsp ; movq %rbp, %rsp ; popq %rbp @@ -122,28 +121,29 @@ block0(v0: i64, v1: i64, v2: i64): ; pushq %rbp ; movq %rsp, %rbp ; subq %rsp, $16, %rsp -; movq %rbx, 0(%rsp) -; movq %r14, 8(%rsp) +; movq %r13, 0(%rsp) +; movq %r15, 8(%rsp) ; block0: -; movq %rdx, %rbx -; movq %rdi, %r14 +; movq %rdx, %r15 +; movq %rdi, %r13 ; subq %rsp, $192, %rsp ; virtual_sp_offset_adjust 192 ; lea 0(%rsp), %rdi ; movl $128, %edx -; load_ext_name %Memcpy+0, %rcx -; call *%rcx +; load_ext_name %Memcpy+0, %r8 +; call *%r8 ; lea 128(%rsp), %rdi -; movq %rbx, %rsi +; movq %r15, %rsi ; movl $64, %edx -; load_ext_name %Memcpy+0, %rcx -; call *%rcx -; movq %r14, %rdi +; load_ext_name %Memcpy+0, %r8 +; movq %r15, %rsi +; call *%r8 +; movq %r13, %rdi ; call User(userextname0) ; addq %rsp, $192, %rsp ; virtual_sp_offset_adjust -192 -; movq 0(%rsp), %rbx -; movq 8(%rsp), %r14 +; movq 0(%rsp), %r13 +; movq 8(%rsp), %r15 ; addq %rsp, $16, %rsp ; movq %rbp, %rsp ; popq %rbp diff --git a/cranelift/filetests/filetests/isa/x64/struct-ret.clif b/cranelift/filetests/filetests/isa/x64/struct-ret.clif index 788a173c1a..b1fa7ede30 100644 --- a/cranelift/filetests/filetests/isa/x64/struct-ret.clif +++ b/cranelift/filetests/filetests/isa/x64/struct-ret.clif @@ -47,15 +47,10 @@ block0(v0: i64): ; pushq %rbp ; movq %rsp, %rbp -; subq %rsp, $16, %rsp -; movq %r15, 0(%rsp) ; block0: -; movq %rdi, %r15 -; load_ext_name %f4+0, %r8 -; call *%r8 -; movq %r15, %rax -; movq 0(%rsp), %r15 -; addq %rsp, $16, %rsp +; movq %rdi, %rax +; load_ext_name %f4+0, %rdx +; call *%rdx ; movq %rbp, %rsp ; popq %rbp ; ret diff --git a/cranelift/filetests/filetests/isa/x64/traps.clif b/cranelift/filetests/filetests/isa/x64/traps.clif index c1697b987f..fcde18bed7 100644 --- a/cranelift/filetests/filetests/isa/x64/traps.clif +++ b/cranelift/filetests/filetests/isa/x64/traps.clif @@ -22,7 +22,8 @@ block0(v0: i64, v1: i64): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; addq %rdi, %rsi, %rdi +; movq %rdi, %rcx +; addq %rcx, %rsi, %rcx ; jnb ; ud2 user0 ; ; movq %rbp, %rsp ; popq %rbp diff --git a/cranelift/filetests/filetests/isa/x64/trunc-libcall.clif b/cranelift/filetests/filetests/isa/x64/trunc-libcall.clif index 226388a1cb..5256edb2f1 100644 --- a/cranelift/filetests/filetests/isa/x64/trunc-libcall.clif +++ b/cranelift/filetests/filetests/isa/x64/trunc-libcall.clif @@ -10,8 +10,8 @@ block0(v0: f32): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; load_ext_name %TruncF32+0, %r8 -; call *%r8 +; load_ext_name %TruncF32+0, %rdx +; call *%rdx ; movq %rbp, %rsp ; popq %rbp ; ret @@ -25,8 +25,8 @@ block0(v0: f64): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; load_ext_name %TruncF64+0, %r8 -; call *%r8 +; load_ext_name %TruncF64+0, %rdx +; call *%rdx ; movq %rbp, %rsp ; popq %rbp ; ret diff --git a/cranelift/filetests/filetests/isa/x64/udiv.clif b/cranelift/filetests/filetests/isa/x64/udiv.clif index 46dae76eab..71ad6b75ed 100644 --- a/cranelift/filetests/filetests/isa/x64/udiv.clif +++ b/cranelift/filetests/filetests/isa/x64/udiv.clif @@ -10,8 +10,7 @@ block0(v0: i8, v1: i8): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; movzbl %dil, %r9d -; movq %r9, %rax +; movzbl %dil, %eax ; div %al, (none), %sil, %al, (none) ; movq %rbp, %rsp ; popq %rbp @@ -26,9 +25,8 @@ block0(v0: i16, v1: i16): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; movl $0, %r10d ; movq %rdi, %rax -; movq %r10, %rdx +; movl $0, %edx ; div %ax, %dx, %si, %ax, %dx ; movq %rbp, %rsp ; popq %rbp @@ -43,9 +41,8 @@ block0(v0: i32, v1: i32): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; movl $0, %r10d ; movq %rdi, %rax -; movq %r10, %rdx +; movl $0, %edx ; div %eax, %edx, %esi, %eax, %edx ; movq %rbp, %rsp ; popq %rbp @@ -60,9 +57,8 @@ block0(v0: i64, v1: i64): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; movl $0, %r10d ; movq %rdi, %rax -; movq %r10, %rdx +; movl $0, %edx ; div %rax, %rdx, %rsi, %rax, %rdx ; movq %rbp, %rsp ; popq %rbp diff --git a/cranelift/filetests/filetests/isa/x64/uextend-elision.clif b/cranelift/filetests/filetests/isa/x64/uextend-elision.clif index 1f88ad6538..7ffc50c086 100644 --- a/cranelift/filetests/filetests/isa/x64/uextend-elision.clif +++ b/cranelift/filetests/filetests/isa/x64/uextend-elision.clif @@ -11,8 +11,8 @@ block0(v0: i32, v1: i32): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; addl %edi, %esi, %edi ; movq %rdi, %rax +; addl %eax, %esi, %eax ; movq %rbp, %rsp ; popq %rbp ; ret diff --git a/cranelift/filetests/filetests/isa/x64/umax-bug.clif b/cranelift/filetests/filetests/isa/x64/umax-bug.clif index f0272041c4..63ea9b4c6e 100644 --- a/cranelift/filetests/filetests/isa/x64/umax-bug.clif +++ b/cranelift/filetests/filetests/isa/x64/umax-bug.clif @@ -13,8 +13,8 @@ block0(v1: i32, v2: i64): ; block0: ; movl 0(%rsi), %r8d ; cmpl %edi, %r8d -; cmovnbl %r8d, %edi, %edi ; movq %rdi, %rax +; cmovnbl %r8d, %eax, %eax ; movq %rbp, %rsp ; popq %rbp ; ret diff --git a/cranelift/filetests/filetests/isa/x64/urem.clif b/cranelift/filetests/filetests/isa/x64/urem.clif index b6ba479bae..d89984faba 100644 --- a/cranelift/filetests/filetests/isa/x64/urem.clif +++ b/cranelift/filetests/filetests/isa/x64/urem.clif @@ -10,8 +10,7 @@ block0(v0: i8, v1: i8): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; movzbl %dil, %r9d -; movq %r9, %rax +; movzbl %dil, %eax ; div %al, (none), %sil, %al, (none) ; shrq $8, %rax, %rax ; movq %rbp, %rsp @@ -27,9 +26,8 @@ block0(v0: i16, v1: i16): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; movl $0, %r10d ; movq %rdi, %rax -; movq %r10, %rdx +; movl $0, %edx ; div %ax, %dx, %si, %ax, %dx ; movq %rdx, %rax ; movq %rbp, %rsp @@ -45,9 +43,8 @@ block0(v0: i32, v1: i32): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; movl $0, %r10d ; movq %rdi, %rax -; movq %r10, %rdx +; movl $0, %edx ; div %eax, %edx, %esi, %eax, %edx ; movq %rdx, %rax ; movq %rbp, %rsp @@ -63,9 +60,8 @@ block0(v0: i64, v1: i64): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; movl $0, %r10d ; movq %rdi, %rax -; movq %r10, %rdx +; movl $0, %edx ; div %rax, %rdx, %rsi, %rax, %rdx ; movq %rdx, %rax ; movq %rbp, %rsp diff --git a/cranelift/filetests/filetests/isa/x64/ushr.clif b/cranelift/filetests/filetests/isa/x64/ushr.clif index b7b5551ade..401e6c9265 100644 --- a/cranelift/filetests/filetests/isa/x64/ushr.clif +++ b/cranelift/filetests/filetests/isa/x64/ushr.clif @@ -16,22 +16,24 @@ block0(v0: i128, v1: i8): ; movq %rsp, %rbp ; block0: ; movzbq %dl, %rcx -; shrq %cl, %rdi, %rdi +; movq %rdi, %r10 +; shrq %cl, %r10, %r10 ; movq %rsi, %r8 ; shrq %cl, %r8, %r8 -; movq %rcx, %rax +; movq %rcx, %rdi ; movl $64, %ecx -; movq %rax, %r9 +; movq %rdi, %r9 ; subq %rcx, %r9, %rcx -; shlq %cl, %rsi, %rsi +; movq %rsi, %rdi +; shlq %cl, %rdi, %rdi ; xorq %r11, %r11, %r11 ; testq $127, %r9 -; cmovzq %r11, %rsi, %rsi -; orq %rsi, %rdi, %rsi +; cmovzq %r11, %rdi, %rdi +; orq %rdi, %r10, %rdi ; xorq %rdx, %rdx, %rdx ; testq $64, %r9 ; movq %r8, %rax -; cmovzq %rsi, %rax, %rax +; cmovzq %rdi, %rax, %rax ; cmovzq %r8, %rdx, %rdx ; movq %rbp, %rsp ; popq %rbp @@ -47,7 +49,8 @@ block0(v0: i128, v1: i64): ; movq %rsp, %rbp ; block0: ; movq %rdx, %rcx -; shrq %cl, %rdi, %rdi +; movq %rdi, %r9 +; shrq %cl, %r9, %r9 ; movq %rsi, %r11 ; shrq %cl, %r11, %r11 ; movl $64, %ecx @@ -57,7 +60,7 @@ block0(v0: i128, v1: i64): ; xorq %r10, %r10, %r10 ; testq $127, %r8 ; cmovzq %r10, %rsi, %rsi -; orq %rsi, %rdi, %rsi +; orq %rsi, %r9, %rsi ; xorq %rdx, %rdx, %rdx ; testq $64, %r8 ; movq %r11, %rax @@ -77,7 +80,8 @@ block0(v0: i128, v1: i32): ; movq %rsp, %rbp ; block0: ; movq %rdx, %rcx -; shrq %cl, %rdi, %rdi +; movq %rdi, %r9 +; shrq %cl, %r9, %r9 ; movq %rsi, %r11 ; shrq %cl, %r11, %r11 ; movl $64, %ecx @@ -87,7 +91,7 @@ block0(v0: i128, v1: i32): ; xorq %r10, %r10, %r10 ; testq $127, %r8 ; cmovzq %r10, %rsi, %rsi -; orq %rsi, %rdi, %rsi +; orq %rsi, %r9, %rsi ; xorq %rdx, %rdx, %rdx ; testq $64, %r8 ; movq %r11, %rax @@ -107,7 +111,8 @@ block0(v0: i128, v1: i16): ; movq %rsp, %rbp ; block0: ; movq %rdx, %rcx -; shrq %cl, %rdi, %rdi +; movq %rdi, %r9 +; shrq %cl, %r9, %r9 ; movq %rsi, %r11 ; shrq %cl, %r11, %r11 ; movl $64, %ecx @@ -117,7 +122,7 @@ block0(v0: i128, v1: i16): ; xorq %r10, %r10, %r10 ; testq $127, %r8 ; cmovzq %r10, %rsi, %rsi -; orq %rsi, %rdi, %rsi +; orq %rsi, %r9, %rsi ; xorq %rdx, %rdx, %rdx ; testq $64, %r8 ; movq %r11, %rax @@ -137,7 +142,8 @@ block0(v0: i128, v1: i8): ; movq %rsp, %rbp ; block0: ; movq %rdx, %rcx -; shrq %cl, %rdi, %rdi +; movq %rdi, %r9 +; shrq %cl, %r9, %r9 ; movq %rsi, %r11 ; shrq %cl, %r11, %r11 ; movl $64, %ecx @@ -147,7 +153,7 @@ block0(v0: i128, v1: i8): ; xorq %r10, %r10, %r10 ; testq $127, %r8 ; cmovzq %r10, %rsi, %rsi -; orq %rsi, %rdi, %rsi +; orq %rsi, %r9, %rsi ; xorq %rdx, %rdx, %rdx ; testq $64, %r8 ; movq %r11, %rax @@ -167,8 +173,8 @@ block0(v0: i64, v1: i128): ; movq %rsp, %rbp ; block0: ; movq %rsi, %rcx -; shrq %cl, %rdi, %rdi ; movq %rdi, %rax +; shrq %cl, %rax, %rax ; movq %rbp, %rsp ; popq %rbp ; ret @@ -184,8 +190,8 @@ block0(v0: i32, v1: i64, v2: i64): ; movq %rsp, %rbp ; block0: ; movq %rsi, %rcx -; shrl %cl, %edi, %edi ; movq %rdi, %rax +; shrl %cl, %eax, %eax ; movq %rbp, %rsp ; popq %rbp ; ret @@ -201,8 +207,8 @@ block0(v0: i16, v1: i128): ; block0: ; movq %rsi, %rcx ; andq %rcx, $15, %rcx -; shrw %cl, %di, %di ; movq %rdi, %rax +; shrw %cl, %ax, %ax ; movq %rbp, %rsp ; popq %rbp ; ret @@ -218,8 +224,8 @@ block0(v0: i8, v1: i128): ; block0: ; movq %rsi, %rcx ; andq %rcx, $7, %rcx -; shrb %cl, %dil, %dil ; movq %rdi, %rax +; shrb %cl, %al, %al ; movq %rbp, %rsp ; popq %rbp ; ret @@ -234,8 +240,8 @@ block0(v0: i64, v1: i64): ; movq %rsp, %rbp ; block0: ; movq %rsi, %rcx -; shrq %cl, %rdi, %rdi ; movq %rdi, %rax +; shrq %cl, %rax, %rax ; movq %rbp, %rsp ; popq %rbp ; ret @@ -250,8 +256,8 @@ block0(v0: i64, v1: i32): ; movq %rsp, %rbp ; block0: ; movq %rsi, %rcx -; shrq %cl, %rdi, %rdi ; movq %rdi, %rax +; shrq %cl, %rax, %rax ; movq %rbp, %rsp ; popq %rbp ; ret @@ -266,8 +272,8 @@ block0(v0: i64, v1: i16): ; movq %rsp, %rbp ; block0: ; movq %rsi, %rcx -; shrq %cl, %rdi, %rdi ; movq %rdi, %rax +; shrq %cl, %rax, %rax ; movq %rbp, %rsp ; popq %rbp ; ret @@ -282,8 +288,8 @@ block0(v0: i64, v1: i8): ; movq %rsp, %rbp ; block0: ; movq %rsi, %rcx -; shrq %cl, %rdi, %rdi ; movq %rdi, %rax +; shrq %cl, %rax, %rax ; movq %rbp, %rsp ; popq %rbp ; ret @@ -298,8 +304,8 @@ block0(v0: i32, v1: i64): ; movq %rsp, %rbp ; block0: ; movq %rsi, %rcx -; shrl %cl, %edi, %edi ; movq %rdi, %rax +; shrl %cl, %eax, %eax ; movq %rbp, %rsp ; popq %rbp ; ret @@ -314,8 +320,8 @@ block0(v0: i32, v1: i32): ; movq %rsp, %rbp ; block0: ; movq %rsi, %rcx -; shrl %cl, %edi, %edi ; movq %rdi, %rax +; shrl %cl, %eax, %eax ; movq %rbp, %rsp ; popq %rbp ; ret @@ -330,8 +336,8 @@ block0(v0: i32, v1: i16): ; movq %rsp, %rbp ; block0: ; movq %rsi, %rcx -; shrl %cl, %edi, %edi ; movq %rdi, %rax +; shrl %cl, %eax, %eax ; movq %rbp, %rsp ; popq %rbp ; ret @@ -346,8 +352,8 @@ block0(v0: i32, v1: i8): ; movq %rsp, %rbp ; block0: ; movq %rsi, %rcx -; shrl %cl, %edi, %edi ; movq %rdi, %rax +; shrl %cl, %eax, %eax ; movq %rbp, %rsp ; popq %rbp ; ret @@ -363,8 +369,8 @@ block0(v0: i16, v1: i64): ; block0: ; movq %rsi, %rcx ; andq %rcx, $15, %rcx -; shrw %cl, %di, %di ; movq %rdi, %rax +; shrw %cl, %ax, %ax ; movq %rbp, %rsp ; popq %rbp ; ret @@ -380,8 +386,8 @@ block0(v0: i16, v1: i32): ; block0: ; movq %rsi, %rcx ; andq %rcx, $15, %rcx -; shrw %cl, %di, %di ; movq %rdi, %rax +; shrw %cl, %ax, %ax ; movq %rbp, %rsp ; popq %rbp ; ret @@ -397,8 +403,8 @@ block0(v0: i16, v1: i16): ; block0: ; movq %rsi, %rcx ; andq %rcx, $15, %rcx -; shrw %cl, %di, %di ; movq %rdi, %rax +; shrw %cl, %ax, %ax ; movq %rbp, %rsp ; popq %rbp ; ret @@ -414,8 +420,8 @@ block0(v0: i16, v1: i8): ; block0: ; movq %rsi, %rcx ; andq %rcx, $15, %rcx -; shrw %cl, %di, %di ; movq %rdi, %rax +; shrw %cl, %ax, %ax ; movq %rbp, %rsp ; popq %rbp ; ret @@ -431,8 +437,8 @@ block0(v0: i8, v1: i64): ; block0: ; movq %rsi, %rcx ; andq %rcx, $7, %rcx -; shrb %cl, %dil, %dil ; movq %rdi, %rax +; shrb %cl, %al, %al ; movq %rbp, %rsp ; popq %rbp ; ret @@ -448,8 +454,8 @@ block0(v0: i8, v1: i32): ; block0: ; movq %rsi, %rcx ; andq %rcx, $7, %rcx -; shrb %cl, %dil, %dil ; movq %rdi, %rax +; shrb %cl, %al, %al ; movq %rbp, %rsp ; popq %rbp ; ret @@ -465,8 +471,8 @@ block0(v0: i8, v1: i16): ; block0: ; movq %rsi, %rcx ; andq %rcx, $7, %rcx -; shrb %cl, %dil, %dil ; movq %rdi, %rax +; shrb %cl, %al, %al ; movq %rbp, %rsp ; popq %rbp ; ret @@ -482,14 +488,12 @@ block0(v0: i8, v1: i8): ; block0: ; movq %rsi, %rcx ; andq %rcx, $7, %rcx -; shrb %cl, %dil, %dil ; movq %rdi, %rax +; shrb %cl, %al, %al ; movq %rbp, %rsp ; popq %rbp ; ret - - function %ushr_i64_const(i64) -> i64 { block0(v0: i64): v1 = ushr_imm.i64 v0, 65 @@ -499,8 +503,8 @@ block0(v0: i64): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; shrq $1, %rdi, %rdi ; movq %rdi, %rax +; shrq $1, %rax, %rax ; movq %rbp, %rsp ; popq %rbp ; ret @@ -514,8 +518,8 @@ block0(v0: i32): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; shrl $1, %edi, %edi ; movq %rdi, %rax +; shrl $1, %eax, %eax ; movq %rbp, %rsp ; popq %rbp ; ret @@ -529,8 +533,8 @@ block0(v0: i16): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; shrw $1, %di, %di ; movq %rdi, %rax +; shrw $1, %ax, %ax ; movq %rbp, %rsp ; popq %rbp ; ret @@ -544,8 +548,8 @@ block0(v0: i8): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; shrb $1, %dil, %dil ; movq %rdi, %rax +; shrb $1, %al, %al ; movq %rbp, %rsp ; popq %rbp ; ret diff --git a/cranelift/filetests/filetests/isa/x64/uunarrow.clif b/cranelift/filetests/filetests/isa/x64/uunarrow.clif index 6b3a1bcce9..facc115bd1 100644 --- a/cranelift/filetests/filetests/isa/x64/uunarrow.clif +++ b/cranelift/filetests/filetests/isa/x64/uunarrow.clif @@ -13,10 +13,11 @@ block0(v0: f64x2): ; movq %rsp, %rbp ; block0: ; xorpd %xmm3, %xmm3, %xmm3 -; maxpd %xmm0, %xmm3, %xmm0 -; movupd const(0), %xmm7 -; minpd %xmm0, %xmm7, %xmm0 -; roundpd $3, %xmm0, %xmm0 +; movdqa %xmm0, %xmm7 +; maxpd %xmm7, %xmm3, %xmm7 +; movupd const(0), %xmm8 +; minpd %xmm7, %xmm8, %xmm7 +; roundpd $3, %xmm7, %xmm0 ; movupd const(1), %xmm13 ; addpd %xmm0, %xmm13, %xmm0 ; shufps $136, %xmm0, %xmm3, %xmm0 diff --git a/cranelift/filetests/filetests/isa/x64/vhigh_bits.clif b/cranelift/filetests/filetests/isa/x64/vhigh_bits.clif index 185cb62764..538d6ddcd8 100644 --- a/cranelift/filetests/filetests/isa/x64/vhigh_bits.clif +++ b/cranelift/filetests/filetests/isa/x64/vhigh_bits.clif @@ -38,8 +38,9 @@ block0(v0: i16x8): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; packsswb %xmm0, %xmm0, %xmm0 -; pmovmskb %xmm0, %eax +; movdqa %xmm0, %xmm3 +; packsswb %xmm3, %xmm0, %xmm3 +; pmovmskb %xmm3, %eax ; shrq $8, %rax, %rax ; movq %rbp, %rsp ; popq %rbp diff --git a/cranelift/filetests/filetests/isa/x64/widening.clif b/cranelift/filetests/filetests/isa/x64/widening.clif index 202a6d4389..9157cbff01 100644 --- a/cranelift/filetests/filetests/isa/x64/widening.clif +++ b/cranelift/filetests/filetests/isa/x64/widening.clif @@ -52,8 +52,9 @@ block0(v0: i8x16): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; palignr $8, %xmm0, %xmm0, %xmm0 -; pmovsxbw %xmm0, %xmm0 +; movdqa %xmm0, %xmm3 +; palignr $8, %xmm3, %xmm0, %xmm3 +; pmovsxbw %xmm3, %xmm0 ; movq %rbp, %rsp ; popq %rbp ; ret @@ -67,8 +68,9 @@ block0(v0: i16x8): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; palignr $8, %xmm0, %xmm0, %xmm0 -; pmovsxwd %xmm0, %xmm0 +; movdqa %xmm0, %xmm3 +; palignr $8, %xmm3, %xmm0, %xmm3 +; pmovsxwd %xmm3, %xmm0 ; movq %rbp, %rsp ; popq %rbp ; ret @@ -139,8 +141,9 @@ block0(v0: i8x16): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; palignr $8, %xmm0, %xmm0, %xmm0 -; pmovzxbw %xmm0, %xmm0 +; movdqa %xmm0, %xmm3 +; palignr $8, %xmm3, %xmm0, %xmm3 +; pmovzxbw %xmm3, %xmm0 ; movq %rbp, %rsp ; popq %rbp ; ret @@ -154,8 +157,9 @@ block0(v0: i16x8): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; palignr $8, %xmm0, %xmm0, %xmm0 -; pmovzxwd %xmm0, %xmm0 +; movdqa %xmm0, %xmm3 +; palignr $8, %xmm3, %xmm0, %xmm3 +; pmovzxwd %xmm3, %xmm0 ; movq %rbp, %rsp ; popq %rbp ; ret diff --git a/supply-chain/audits.toml b/supply-chain/audits.toml index 6a47bcbf31..a67e3b3e37 100644 --- a/supply-chain/audits.toml +++ b/supply-chain/audits.toml @@ -170,6 +170,12 @@ criteria = "safe-to-deploy" delta = "0.3.1 -> 0.3.2" notes = "The Bytecode Alliance is the author of this crate." +[[audits.regalloc2]] +who = "Chris Fallin " +criteria = "safe-to-deploy" +delta = "0.3.2 -> 0.4.0" +notes = "The Bytecode Alliance is the author of this crate." + [[audits.rustc-demangle]] who = "Alex Crichton " criteria = "safe-to-deploy"