From 05cbd667c7e89828a45ebc5760787a6160c55c8d Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Tue, 20 Sep 2022 18:17:04 -0700 Subject: [PATCH] Cranelift: use regalloc2 constraints on caller side of ABI code. (#4892) * Cranelift: use regalloc2 constraints on caller side of ABI code. This PR updates the shared ABI code and backends to use register-operand constraints rather than explicit pinned-vreg moves for register arguments and return values. The s390x backend was not updated, because it has its own implementation of ABI code. Ideally we could converge back to the code shared by x64 and aarch64 (which didn't exist when s390x ported calls to ISLE, so the current situation is underestandable, to be clear!). I'll leave this for future work. This PR exposed several places where regalloc2 needed to be a bit more flexible with constraints; it requires regalloc2#74 to be merged and pulled in. * Update to regalloc2 0.3.3. In addition to version bump, this required removing two asserts as `SpillSlot`s no longer carry their class (so we can't assert that they have the correct class). * Review comments. * Filetest updates. * Add cargo-vet audit for regalloc2 0.3.2 -> 0.3.3 upgrade. * Update to regalloc2 0.4.0. --- Cargo.lock | 4 +- cranelift/codegen/Cargo.toml | 2 +- cranelift/codegen/src/isa/aarch64/abi.rs | 25 +- cranelift/codegen/src/isa/aarch64/inst/mod.rs | 24 +- cranelift/codegen/src/isa/s390x/abi.rs | 6 +- cranelift/codegen/src/isa/x64/abi.rs | 44 ++- cranelift/codegen/src/isa/x64/inst/mod.rs | 41 ++- cranelift/codegen/src/isa/x64/lower.rs | 15 +- cranelift/codegen/src/machinst/abi.rs | 242 +++++++++---- cranelift/codegen/src/machinst/isle.rs | 23 +- cranelift/codegen/src/machinst/vcode.rs | 2 - .../filetests/isa/aarch64/amodes.clif | 69 ++-- .../filetests/filetests/isa/aarch64/bti.clif | 1 + .../filetests/filetests/isa/aarch64/call.clif | 156 ++++---- .../filetests/isa/aarch64/floating-point.clif | 15 +- .../filetests/isa/aarch64/reftypes.clif | 18 +- .../filetests/isa/aarch64/simd-narrow.clif | 30 +- .../filetests/isa/aarch64/tls-elf-gd.clif | 3 +- .../isa/s390x/atomic_cas-little.clif | 18 +- .../filetests/isa/s390x/atomic_cas.clif | 21 +- .../isa/s390x/atomic_rmw-arch13.clif | 58 +-- .../isa/s390x/atomic_rmw-little.clif | 336 ++++++++++-------- .../filetests/isa/s390x/atomic_rmw.clif | 326 +++++++++-------- .../filetests/filetests/isa/s390x/bitops.clif | 21 +- .../filetests/isa/s390x/bitwise.clif | 15 +- .../filetests/filetests/isa/s390x/call.clif | 6 +- .../filetests/isa/s390x/shift-rotate.clif | 42 ++- .../filetests/isa/x64/amode-opt.clif | 5 +- cranelift/filetests/filetests/isa/x64/b1.clif | 26 +- .../filetests/filetests/isa/x64/basic.clif | 2 +- .../filetests/isa/x64/call-conv.clif | 113 +++--- .../filetests/isa/x64/ceil-libcall.clif | 8 +- .../filetests/isa/x64/cmp-mem-bug.clif | 2 +- .../filetests/isa/x64/div-checks.clif | 12 +- .../filetests/filetests/isa/x64/fastcall.clif | 21 +- .../filetests/filetests/isa/x64/fcvt.clif | 25 +- .../filetests/isa/x64/floor-libcall.clif | 8 +- .../filetests/filetests/isa/x64/fma-call.clif | 8 +- .../filetests/filetests/isa/x64/i128.clif | 243 +++++++------ .../filetests/filetests/isa/x64/ishl.clif | 104 +++--- .../filetests/filetests/isa/x64/load-op.clif | 11 +- .../filetests/isa/x64/narrowing.clif | 5 +- .../filetests/isa/x64/nearest-libcall.clif | 8 +- .../filetests/filetests/isa/x64/popcnt.clif | 22 +- .../filetests/filetests/isa/x64/sdiv.clif | 4 - .../filetests/isa/x64/select-i128.clif | 15 +- .../isa/x64/simd-bitwise-compile.clif | 45 +-- .../isa/x64/simd-logical-compile.clif | 5 +- .../filetests/filetests/isa/x64/srem.clif | 12 +- .../filetests/filetests/isa/x64/sshr.clif | 170 ++++----- .../filetests/isa/x64/struct-arg.clif | 40 +-- .../filetests/isa/x64/struct-ret.clif | 11 +- .../filetests/filetests/isa/x64/traps.clif | 3 +- .../filetests/isa/x64/trunc-libcall.clif | 8 +- .../filetests/filetests/isa/x64/udiv.clif | 12 +- .../filetests/isa/x64/uextend-elision.clif | 2 +- .../filetests/filetests/isa/x64/umax-bug.clif | 2 +- .../filetests/filetests/isa/x64/urem.clif | 12 +- .../filetests/filetests/isa/x64/ushr.clif | 86 ++--- .../filetests/filetests/isa/x64/uunarrow.clif | 9 +- .../filetests/isa/x64/vhigh_bits.clif | 5 +- .../filetests/filetests/isa/x64/widening.clif | 20 +- supply-chain/audits.toml | 6 + 63 files changed, 1476 insertions(+), 1177 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 6fb857c6cd..38b60d3d34 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2388,9 +2388,9 @@ dependencies = [ [[package]] name = "regalloc2" -version = "0.3.2" +version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d43a209257d978ef079f3d446331d0f1794f5e0fc19b306a199983857833a779" +checksum = "91ffba626f895ce5b8b97614bafa3fd59623490fe82f0fa8046dba6664a37b51" dependencies = [ "fxhash", "log", diff --git a/cranelift/codegen/Cargo.toml b/cranelift/codegen/Cargo.toml index 81e4d1294f..91cdcbd307 100644 --- a/cranelift/codegen/Cargo.toml +++ b/cranelift/codegen/Cargo.toml @@ -25,7 +25,7 @@ serde = { version = "1.0.94", features = ["derive"], optional = true } bincode = { version = "1.2.1", optional = true } gimli = { version = "0.26.0", default-features = false, features = ["write"], optional = true } smallvec = { version = "1.6.1" } -regalloc2 = { version = "0.3.2", features = ["checker"] } +regalloc2 = { version = "0.4.0", features = ["checker"] } souper-ir = { version = "2.1.0", optional = true } sha2 = { version = "0.9.0", optional = true } # It is a goal of the cranelift-codegen crate to have minimal external dependencies. diff --git a/cranelift/codegen/src/isa/aarch64/abi.rs b/cranelift/codegen/src/isa/aarch64/abi.rs index 2602d80953..6bd8f946c0 100644 --- a/cranelift/codegen/src/isa/aarch64/abi.rs +++ b/cranelift/codegen/src/isa/aarch64/abi.rs @@ -919,8 +919,8 @@ impl ABIMachineSpec for AArch64MachineDeps { fn gen_call( dest: &CallDest, - uses: SmallVec<[Reg; 8]>, - defs: SmallVec<[Writable; 8]>, + uses: CallArgList, + defs: CallRetList, clobbers: PRegSet, opcode: ir::Opcode, tmp: Writable, @@ -978,19 +978,32 @@ impl ABIMachineSpec for AArch64MachineDeps { call_conv: isa::CallConv, dst: Reg, src: Reg, + tmp: Writable, + _tmp2: Writable, size: usize, ) -> SmallVec<[Self::I; 8]> { let mut insts = SmallVec::new(); let arg0 = writable_xreg(0); let arg1 = writable_xreg(1); let arg2 = writable_xreg(2); - insts.push(Inst::gen_move(arg0, dst, I64)); - insts.push(Inst::gen_move(arg1, src, I64)); - insts.extend(Inst::load_constant(arg2, size as u64).into_iter()); + insts.extend(Inst::load_constant(tmp, size as u64).into_iter()); insts.push(Inst::Call { info: Box::new(CallInfo { dest: ExternalName::LibCall(LibCall::Memcpy), - uses: smallvec![arg0.to_reg(), arg1.to_reg(), arg2.to_reg()], + uses: smallvec![ + CallArgPair { + vreg: dst, + preg: arg0.to_reg() + }, + CallArgPair { + vreg: src, + preg: arg1.to_reg() + }, + CallArgPair { + vreg: tmp.to_reg(), + preg: arg2.to_reg() + } + ], defs: smallvec![], clobbers: Self::get_regs_clobbered_by_call(call_conv), opcode: Opcode::Call, diff --git a/cranelift/codegen/src/isa/aarch64/inst/mod.rs b/cranelift/codegen/src/isa/aarch64/inst/mod.rs index 34eb4f1aa4..744a581338 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/mod.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/mod.rs @@ -78,8 +78,8 @@ impl BitOp { #[derive(Clone, Debug)] pub struct CallInfo { pub dest: ExternalName, - pub uses: SmallVec<[Reg; 8]>, - pub defs: SmallVec<[Writable; 8]>, + pub uses: CallArgList, + pub defs: CallRetList, pub clobbers: PRegSet, pub opcode: Opcode, pub caller_callconv: CallConv, @@ -91,8 +91,8 @@ pub struct CallInfo { #[derive(Clone, Debug)] pub struct CallIndInfo { pub rn: Reg, - pub uses: SmallVec<[Reg; 8]>, - pub defs: SmallVec<[Writable; 8]>, + pub uses: SmallVec<[CallArgPair; 8]>, + pub defs: SmallVec<[CallRetPair; 8]>, pub clobbers: PRegSet, pub opcode: Opcode, pub caller_callconv: CallConv, @@ -1027,14 +1027,22 @@ fn aarch64_get_operands VReg>(inst: &Inst, collector: &mut Operan } &Inst::Jump { .. } => {} &Inst::Call { ref info, .. } => { - collector.reg_uses(&info.uses[..]); - collector.reg_defs(&info.defs[..]); + for u in &info.uses { + collector.reg_fixed_use(u.vreg, u.preg); + } + for d in &info.defs { + collector.reg_fixed_def(d.vreg, d.preg); + } collector.reg_clobbers(info.clobbers); } &Inst::CallInd { ref info, .. } => { collector.reg_use(info.rn); - collector.reg_uses(&info.uses[..]); - collector.reg_defs(&info.defs[..]); + for u in &info.uses { + collector.reg_fixed_use(u.vreg, u.preg); + } + for d in &info.defs { + collector.reg_fixed_def(d.vreg, d.preg); + } collector.reg_clobbers(info.clobbers); } &Inst::CondBr { ref kind, .. } => match kind { diff --git a/cranelift/codegen/src/isa/s390x/abi.rs b/cranelift/codegen/src/isa/s390x/abi.rs index 7a13a17f9d..c77ed2f905 100644 --- a/cranelift/codegen/src/isa/s390x/abi.rs +++ b/cranelift/codegen/src/isa/s390x/abi.rs @@ -743,8 +743,8 @@ impl ABIMachineSpec for S390xMachineDeps { fn gen_call( _dest: &CallDest, - _uses: SmallVec<[Reg; 8]>, - _defs: SmallVec<[Writable; 8]>, + _uses: CallArgList, + _defs: CallRetList, _clobbers: PRegSet, _opcode: ir::Opcode, _tmp: Writable, @@ -758,6 +758,8 @@ impl ABIMachineSpec for S390xMachineDeps { _call_conv: isa::CallConv, _dst: Reg, _src: Reg, + _tmp1: Writable, + _tmp2: Writable, _size: usize, ) -> SmallVec<[Self::I; 8]> { unimplemented!("StructArgs not implemented for S390X yet"); diff --git a/cranelift/codegen/src/isa/x64/abi.rs b/cranelift/codegen/src/isa/x64/abi.rs index efd235cdeb..7911be775d 100644 --- a/cranelift/codegen/src/isa/x64/abi.rs +++ b/cranelift/codegen/src/isa/x64/abi.rs @@ -429,7 +429,9 @@ impl ABIMachineSpec for X64ABIMachineSpec { insts.push(Inst::CallKnown { dest: ExternalName::LibCall(LibCall::Probestack), info: Box::new(CallInfo { - uses: smallvec![regs::rax()], + // No need to include arg here: we are post-regalloc + // so no constraints will be seen anyway. + uses: smallvec![], defs: smallvec![], clobbers: PRegSet::empty(), opcode: Opcode::Call, @@ -584,8 +586,8 @@ impl ABIMachineSpec for X64ABIMachineSpec { /// Generate a call instruction/sequence. fn gen_call( dest: &CallDest, - uses: SmallVec<[Reg; 8]>, - defs: SmallVec<[Writable; 8]>, + uses: CallArgList, + defs: CallRetList, clobbers: PRegSet, opcode: ir::Opcode, tmp: Writable, @@ -628,39 +630,47 @@ impl ABIMachineSpec for X64ABIMachineSpec { call_conv: isa::CallConv, dst: Reg, src: Reg, + temp: Writable, + temp2: Writable, size: usize, ) -> SmallVec<[Self::I; 8]> { let mut insts = SmallVec::new(); let arg0 = get_intreg_for_arg(&call_conv, 0, 0).unwrap(); let arg1 = get_intreg_for_arg(&call_conv, 1, 1).unwrap(); let arg2 = get_intreg_for_arg(&call_conv, 2, 2).unwrap(); - // We need a register to load the address of `memcpy()` below and we - // don't have a lowering context to allocate a temp here; so just use a - // register we know we are free to mutate as part of this sequence - // (because it is clobbered by the call as per the ABI anyway). - let memcpy_addr = get_intreg_for_arg(&call_conv, 3, 3).unwrap(); insts.push(Inst::gen_move(Writable::from_reg(arg0), dst, I64)); insts.push(Inst::gen_move(Writable::from_reg(arg1), src, I64)); insts.extend( - Inst::gen_constant( - ValueRegs::one(Writable::from_reg(arg2)), - size as u128, - I64, - |_| panic!("tmp should not be needed"), - ) + Inst::gen_constant(ValueRegs::one(temp), size as u128, I64, |_| { + panic!("tmp should not be needed") + }) .into_iter(), ); // We use an indirect call and a full LoadExtName because we do not have // information about the libcall `RelocDistance` here, so we // conservatively use the more flexible calling sequence. insts.push(Inst::LoadExtName { - dst: Writable::from_reg(memcpy_addr), + dst: temp2, name: Box::new(ExternalName::LibCall(LibCall::Memcpy)), offset: 0, }); insts.push(Inst::call_unknown( - RegMem::reg(memcpy_addr), - /* uses = */ smallvec![arg0, arg1, arg2], + RegMem::reg(temp2.to_reg()), + /* uses = */ + smallvec![ + CallArgPair { + vreg: dst, + preg: arg0 + }, + CallArgPair { + vreg: src, + preg: arg1 + }, + CallArgPair { + vreg: temp.to_reg(), + preg: arg2 + }, + ], /* defs = */ smallvec![], /* clobbers = */ Self::get_regs_clobbered_by_call(call_conv), Opcode::Call, diff --git a/cranelift/codegen/src/isa/x64/inst/mod.rs b/cranelift/codegen/src/isa/x64/inst/mod.rs index 9f67a7ef3d..ae92b7307e 100644 --- a/cranelift/codegen/src/isa/x64/inst/mod.rs +++ b/cranelift/codegen/src/isa/x64/inst/mod.rs @@ -1,7 +1,7 @@ //! This module defines x86_64-specific machine instruction types. use crate::binemit::{Addend, CodeOffset, Reloc, StackMap}; -use crate::ir::{types, ExternalName, Opcode, RelSourceLoc, TrapCode, Type}; +use crate::ir::{types, ExternalName, LibCall, Opcode, RelSourceLoc, TrapCode, Type}; use crate::isa::x64::abi::X64ABIMachineSpec; use crate::isa::x64::inst::regs::pretty_print_reg; use crate::isa::x64::settings as x64_settings; @@ -34,9 +34,9 @@ pub use super::lower::isle::generated_code::MInst as Inst; #[derive(Clone, Debug)] pub struct CallInfo { /// Register uses of this call. - pub uses: SmallVec<[Reg; 8]>, + pub uses: CallArgList, /// Register defs of this call. - pub defs: SmallVec<[Writable; 8]>, + pub defs: CallRetList, /// Registers clobbered by this call, as per its calling convention. pub clobbers: PRegSet, /// The opcode of this call. @@ -490,8 +490,8 @@ impl Inst { pub(crate) fn call_known( dest: ExternalName, - uses: SmallVec<[Reg; 8]>, - defs: SmallVec<[Writable; 8]>, + uses: CallArgList, + defs: CallRetList, clobbers: PRegSet, opcode: Opcode, ) -> Inst { @@ -508,8 +508,8 @@ impl Inst { pub(crate) fn call_unknown( dest: RegMem, - uses: SmallVec<[Reg; 8]>, - defs: SmallVec<[Writable; 8]>, + uses: CallArgList, + defs: CallRetList, clobbers: PRegSet, opcode: Opcode, ) -> Inst { @@ -1446,7 +1446,9 @@ impl PrettyPrint for Inst { format!("{} {}", ljustify("popq".to_string()), dst) } - Inst::CallKnown { dest, .. } => format!("{} {:?}", ljustify("call".to_string()), dest), + Inst::CallKnown { dest, .. } => { + format!("{} {:?}", ljustify("call".to_string()), dest) + } Inst::CallUnknown { dest, .. } => { let dest = dest.pretty_print(8, allocs); @@ -1981,23 +1983,28 @@ fn x64_get_operands VReg>(inst: &Inst, collector: &mut OperandCol collector.reg_early_def(*tmp); } - Inst::CallKnown { ref info, .. } => { - for &u in &info.uses { - collector.reg_use(u); + Inst::CallKnown { dest, ref info, .. } => { + // Probestack is special and is only inserted after + // regalloc, so we do not need to represent its ABI to the + // register allocator. Assert that we don't alter that + // arrangement. + debug_assert_ne!(*dest, ExternalName::LibCall(LibCall::Probestack)); + for u in &info.uses { + collector.reg_fixed_use(u.vreg, u.preg); } - for &d in &info.defs { - collector.reg_def(d); + for d in &info.defs { + collector.reg_fixed_def(d.vreg, d.preg); } collector.reg_clobbers(info.clobbers); } Inst::CallUnknown { ref info, dest, .. } => { dest.get_operands(collector); - for &u in &info.uses { - collector.reg_use(u); + for u in &info.uses { + collector.reg_fixed_use(u.vreg, u.preg); } - for &d in &info.defs { - collector.reg_def(d); + for d in &info.defs { + collector.reg_fixed_def(d.vreg, d.preg); } collector.reg_clobbers(info.clobbers); } diff --git a/cranelift/codegen/src/isa/x64/lower.rs b/cranelift/codegen/src/isa/x64/lower.rs index 1f151800b3..836b2c0056 100644 --- a/cranelift/codegen/src/isa/x64/lower.rs +++ b/cranelift/codegen/src/isa/x64/lower.rs @@ -8,11 +8,12 @@ use crate::isa::x64::abi::*; use crate::isa::x64::inst::args::*; use crate::isa::x64::inst::*; use crate::isa::{x64::settings as x64_settings, x64::X64Backend, CallConv}; +use crate::machinst::abi::SmallInstVec; use crate::machinst::lower::*; use crate::machinst::*; use crate::result::CodegenResult; use crate::settings::Flags; -use smallvec::SmallVec; +use smallvec::{smallvec, SmallVec}; use target_lexicon::Triple; //============================================================================= @@ -168,16 +169,18 @@ fn emit_vm_call( assert_eq!(inputs.len(), abi.num_args(ctx.sigs())); for (i, input) in inputs.iter().enumerate() { - for inst in abi.gen_copy_regs_to_arg(ctx, i, ValueRegs::one(*input)) { + for inst in abi.gen_arg(ctx, i, ValueRegs::one(*input)) { ctx.emit(inst); } } - abi.emit_call(ctx); + let mut retval_insts: SmallInstVec<_> = smallvec![]; for (i, output) in outputs.iter().enumerate() { - for inst in abi.gen_copy_retval_to_regs(ctx, i, ValueRegs::one(*output)) { - ctx.emit(inst); - } + retval_insts.extend(abi.gen_retval(ctx, i, ValueRegs::one(*output)).into_iter()); + } + abi.emit_call(ctx); + for inst in retval_insts { + ctx.emit(inst); } abi.emit_stack_post_adjust(ctx); diff --git a/cranelift/codegen/src/machinst/abi.rs b/cranelift/codegen/src/machinst/abi.rs index a7e7c3c204..bbff4abeca 100644 --- a/cranelift/codegen/src/machinst/abi.rs +++ b/cranelift/codegen/src/machinst/abi.rs @@ -506,8 +506,8 @@ pub trait ABIMachineSpec { /// temporary register to use to synthesize the called address, if needed. fn gen_call( dest: &CallDest, - uses: SmallVec<[Reg; 8]>, - defs: SmallVec<[Writable; 8]>, + uses: CallArgList, + defs: CallRetList, clobbers: PRegSet, opcode: ir::Opcode, tmp: Writable, @@ -515,13 +515,16 @@ pub trait ABIMachineSpec { callee_conv: isa::CallConv, ) -> SmallVec<[Self::I; 2]>; - /// Generate a memcpy invocation. Used to set up struct args. May clobber - /// caller-save registers; we only memcpy before we start to set up args for - /// a call. + /// Generate a memcpy invocation. Used to set up struct + /// args. Takes `src`, `dst` as read-only inputs and requires two + /// temporaries to generate the call (for the size immediate and + /// possibly for the address of `memcpy` itself). fn gen_memcpy( call_conv: isa::CallConv, dst: Reg, src: Reg, + tmp1: Writable, + tmp2: Writable, size: usize, ) -> SmallVec<[Self::I; 8]>; @@ -623,6 +626,9 @@ impl SigData { /// Return all uses (i.e, function args), defs (i.e., return values /// and caller-saved registers), and clobbers for the callsite. + /// + /// FIXME: used only by s390x; remove once that backend moves to + /// `call_clobbers` and constraint-based calls. pub fn call_uses_defs_clobbers( &self, ) -> (SmallVec<[Reg; 8]>, SmallVec<[Writable; 8]>, PRegSet) { @@ -682,6 +688,30 @@ impl SigData { (uses, defs, clobbers) } + /// Return all clobbers for the callsite. + pub fn call_clobbers(&self) -> PRegSet { + // Get clobbers: all caller-saves. These may include return value + // regs, which we will remove from the clobber set below. + let mut clobbers = M::get_regs_clobbered_by_call(self.call_conv); + + // Remove retval regs from clobbers. + for ret in &self.rets { + if let &ABIArg::Slots { ref slots, .. } = ret { + for slot in slots { + match slot { + &ABIArgSlot::Reg { reg, .. } => { + log::trace!("call_clobbers: retval reg {:?}", reg); + clobbers.remove(PReg::from(reg)); + } + _ => {} + } + } + } + } + + clobbers + } + /// Get the number of arguments expected. pub fn num_args(&self) -> usize { if self.stack_ret_arg.is_some() { @@ -1848,14 +1878,38 @@ impl Callee { } } +/// An input argument to a call instruction: the vreg that is used, +/// and the preg it is constrained to (per the ABI). +#[derive(Clone, Debug)] +pub struct CallArgPair { + /// The virtual register to use for the argument. + pub vreg: Reg, + /// The real register into which the arg goes. + pub preg: Reg, +} + +/// An output return value from a call instruction: the vreg that is +/// defined, and the preg it is constrained to (per the ABI). +#[derive(Clone, Debug)] +pub struct CallRetPair { + /// The virtual register to define from this return value. + pub vreg: Writable, + /// The real register from which the return value is read. + pub preg: Reg, +} + +pub type CallArgList = SmallVec<[CallArgPair; 8]>; +pub type CallRetList = SmallVec<[CallRetPair; 8]>; + /// ABI object for a callsite. pub struct Caller { /// The called function's signature. sig: Sig, - /// All uses for the callsite, i.e., function args. - uses: SmallVec<[Reg; 8]>, + /// All register uses for the callsite, i.e., function args, with + /// VReg and the physical register it is constrained to. + uses: CallArgList, /// All defs for the callsite, i.e., return values. - defs: SmallVec<[Writable; 8]>, + defs: CallRetList, /// Caller-save clobbers. clobbers: PRegSet, /// Call destination. @@ -1890,11 +1944,11 @@ impl Caller { flags: settings::Flags, ) -> CodegenResult> { let sig = sigs.abi_sig_for_sig_ref(sig_ref); - let (uses, defs, clobbers) = sigs[sig].call_uses_defs_clobbers::(); + let clobbers = sigs[sig].call_clobbers::(); Ok(Caller { sig, - uses, - defs, + uses: smallvec![], + defs: smallvec![], clobbers, dest: CallDest::ExtName(extname.clone(), dist), opcode: ir::Opcode::Call, @@ -1915,11 +1969,11 @@ impl Caller { flags: settings::Flags, ) -> CodegenResult> { let sig = sigs.abi_sig_for_signature(sig); - let (uses, defs, clobbers) = sigs[sig].call_uses_defs_clobbers::(); + let clobbers = sigs[sig].call_clobbers::(); Ok(Caller { sig, - uses, - defs, + uses: smallvec![], + defs: smallvec![], clobbers, dest: CallDest::ExtName(extname.clone(), dist), opcode: ir::Opcode::Call, @@ -1940,11 +1994,11 @@ impl Caller { flags: settings::Flags, ) -> CodegenResult> { let sig = sigs.abi_sig_for_sig_ref(sig_ref); - let (uses, defs, clobbers) = sigs[sig].call_uses_defs_clobbers::(); + let clobbers = sigs[sig].call_clobbers::(); Ok(Caller { sig, - uses, - defs, + uses: smallvec![], + defs: smallvec![], clobbers, dest: CallDest::Reg(ptr), opcode, @@ -2018,9 +2072,17 @@ impl Caller { // arg regs. let memcpy_call_conv = isa::CallConv::for_libcall(&self.flags, ctx.sigs()[self.sig].call_conv); - for insn in - M::gen_memcpy(memcpy_call_conv, dst_ptr.to_reg(), src_ptr, size as usize) - .into_iter() + let tmp1 = ctx.alloc_tmp(M::word_type()).only_reg().unwrap(); + let tmp2 = ctx.alloc_tmp(M::word_type()).only_reg().unwrap(); + for insn in M::gen_memcpy( + memcpy_call_conv, + dst_ptr.to_reg(), + src_ptr, + tmp1, + tmp2, + size as usize, + ) + .into_iter() { ctx.emit(insn); } @@ -2029,19 +2091,48 @@ impl Caller { } } - /// Generate a copy of an argument value from a source register, prior to - /// the call. For large arguments with associated stack buffer, this may - /// load the address of the buffer into the argument register, if required - /// by the ABI. - pub fn gen_copy_regs_to_arg( - &self, - ctx: &Lower, + /// Add a constraint for an argument value from a source register. + /// For large arguments with associated stack buffer, this may + /// load the address of the buffer into the argument register, if + /// required by the ABI. + pub fn gen_arg( + &mut self, + ctx: &mut Lower, idx: usize, from_regs: ValueRegs, ) -> SmallInstVec { let mut insts = smallvec![]; let word_rc = M::word_reg_class(); let word_bits = M::word_bits() as usize; + + // How many temps do we need for extends? Allocate them ahead + // of time, since we can't do it while we're iterating over + // the sig and immutably borrowing `ctx`. + let needed_tmps = match &ctx.sigs()[self.sig].args[idx] { + &ABIArg::Slots { ref slots, .. } => slots + .iter() + .map(|slot| match slot { + &ABIArgSlot::Reg { extension, .. } + if extension != ir::ArgumentExtension::None => + { + 1 + } + &ABIArgSlot::Reg { ty, .. } if ty.is_ref() => 1, + &ABIArgSlot::Reg { .. } => 0, + &ABIArgSlot::Stack { extension, .. } + if extension != ir::ArgumentExtension::None => + { + 1 + } + &ABIArgSlot::Stack { .. } => 0, + }) + .sum(), + _ => 0, + }; + let mut temps: SmallVec<[Writable; 16]> = (0..needed_tmps) + .map(|_| ctx.alloc_tmp(M::word_type()).only_reg().unwrap()) + .collect(); + match &ctx.sigs()[self.sig].args[idx] { &ABIArg::Slots { ref slots, .. } => { assert_eq!(from_regs.len(), slots.len()); @@ -2058,19 +2149,36 @@ impl Caller { ir::ArgumentExtension::Sext => true, _ => unreachable!(), }; + let extend_result = + temps.pop().expect("Must have allocated enough temps"); insts.push(M::gen_extend( - Writable::from_reg(Reg::from(reg)), + extend_result, *from_reg, signed, ty_bits(ty) as u8, word_bits as u8, )); + self.uses.push(CallArgPair { + vreg: extend_result.to_reg(), + preg: reg.into(), + }); + } else if ty.is_ref() { + // Reference-typed args need to be + // passed as a copy; the original vreg + // is constrained to the stack and + // this copy is in a reg. + let ref_copy = + temps.pop().expect("Must have allocated enough temps"); + insts.push(M::gen_move(ref_copy, *from_reg, M::word_type())); + self.uses.push(CallArgPair { + vreg: ref_copy.to_reg(), + preg: reg.into(), + }); } else { - insts.push(M::gen_move( - Writable::from_reg(Reg::from(reg)), - *from_reg, - ty, - )); + self.uses.push(CallArgPair { + vreg: *from_reg, + preg: reg.into(), + }); } } &ABIArgSlot::Stack { @@ -2079,31 +2187,32 @@ impl Caller { extension, .. } => { - let mut ty = ty; let ext = M::get_ext_mode(ctx.sigs()[self.sig].call_conv, extension); - if ext != ir::ArgumentExtension::None && ty_bits(ty) < word_bits { - assert_eq!(word_rc, from_reg.class()); - let signed = match ext { - ir::ArgumentExtension::Uext => false, - ir::ArgumentExtension::Sext => true, - _ => unreachable!(), + let (data, ty) = + if ext != ir::ArgumentExtension::None && ty_bits(ty) < word_bits { + assert_eq!(word_rc, from_reg.class()); + let signed = match ext { + ir::ArgumentExtension::Uext => false, + ir::ArgumentExtension::Sext => true, + _ => unreachable!(), + }; + let extend_result = + temps.pop().expect("Must have allocated enough temps"); + insts.push(M::gen_extend( + extend_result, + *from_reg, + signed, + ty_bits(ty) as u8, + word_bits as u8, + )); + // Store the extended version. + (extend_result.to_reg(), M::word_type()) + } else { + (*from_reg, ty) }; - // Extend in place in the source register. Our convention is to - // treat high bits as undefined for values in registers, so this - // is safe, even for an argument that is nominally read-only. - insts.push(M::gen_extend( - Writable::from_reg(*from_reg), - *from_reg, - signed, - ty_bits(ty) as u8, - word_bits as u8, - )); - // Store the extended version. - ty = M::word_type(); - } insts.push(M::gen_store_stack( StackAMode::SPOffset(offset, ty), - *from_reg, + data, ty, )); } @@ -2118,9 +2227,9 @@ impl Caller { insts } - /// Emit a copy a return value into a destination register, after the call returns. - pub fn gen_copy_retval_to_regs( - &self, + /// Define a return value after the call returns. + pub fn gen_retval( + &mut self, ctx: &Lower, idx: usize, into_regs: ValueRegs>, @@ -2133,8 +2242,11 @@ impl Caller { match slot { // Extension mode doesn't matter because we're copying out, not in, // and we ignore high bits in our own registers by convention. - &ABIArgSlot::Reg { reg, ty, .. } => { - insts.push(M::gen_move(*into_reg, Reg::from(reg), ty)); + &ABIArgSlot::Reg { reg, .. } => { + self.defs.push(CallRetPair { + vreg: *into_reg, + preg: reg.into(), + }); } &ABIArgSlot::Stack { offset, ty, .. } => { let ret_area_base = ctx.sigs()[self.sig].sized_stack_arg_space; @@ -2171,10 +2283,6 @@ impl Caller { /// This function should only be called once, as it is allowed to re-use /// parts of the `Caller` object in emitting instructions. pub fn emit_call(&mut self, ctx: &mut Lower) { - let (uses, defs) = ( - mem::replace(&mut self.uses, Default::default()), - mem::replace(&mut self.defs, Default::default()), - ); let word_type = M::word_type(); if let Some(i) = ctx.sigs()[self.sig].stack_ret_arg { let rd = ctx.alloc_tmp(word_type).only_reg().unwrap(); @@ -2184,10 +2292,16 @@ impl Caller { rd, I8, )); - for inst in self.gen_copy_regs_to_arg(ctx, i, ValueRegs::one(rd.to_reg())) { + for inst in self.gen_arg(ctx, i, ValueRegs::one(rd.to_reg())) { ctx.emit(inst); } } + + let (uses, defs) = ( + mem::replace(&mut self.uses, Default::default()), + mem::replace(&mut self.defs, Default::default()), + ); + let tmp = ctx.alloc_tmp(word_type).only_reg().unwrap(); for inst in M::gen_call( &self.dest, diff --git a/cranelift/codegen/src/machinst/isle.rs b/cranelift/codegen/src/machinst/isle.rs index feff699d4e..d15340d66d 100644 --- a/cranelift/codegen/src/machinst/isle.rs +++ b/cranelift/codegen/src/machinst/isle.rs @@ -1208,21 +1208,32 @@ macro_rules! isle_prelude_method_helpers { caller.emit_copy_regs_to_buffer(self.lower_ctx, i, *arg_regs); } for (i, arg_regs) in arg_regs.iter().enumerate() { - for inst in caller.gen_copy_regs_to_arg(self.lower_ctx, i, *arg_regs) { + for inst in caller.gen_arg(self.lower_ctx, i, *arg_regs) { self.lower_ctx.emit(inst); } } - caller.emit_call(self.lower_ctx); - + // Handle retvals prior to emitting call, so the + // constraints are on the call instruction; but buffer the + // instructions till after the call. let mut outputs = InstOutput::new(); + let mut retval_insts: crate::machinst::abi::SmallInstVec<_> = smallvec::smallvec![]; for i in 0..num_rets { let ret = self.lower_ctx.sigs()[abi].get_ret(i); let retval_regs = self.abi_arg_slot_regs(&ret).unwrap(); - for inst in caller.gen_copy_retval_to_regs(self.lower_ctx, i, retval_regs.clone()) { - self.lower_ctx.emit(inst); - } + retval_insts.extend( + caller + .gen_retval(self.lower_ctx, i, retval_regs.clone()) + .into_iter(), + ); outputs.push(valueregs::non_writable_value_regs(retval_regs)); } + + caller.emit_call(self.lower_ctx); + + for inst in retval_insts { + self.lower_ctx.emit(inst); + } + caller.emit_stack_post_adjust(self.lower_ctx); outputs diff --git a/cranelift/codegen/src/machinst/vcode.rs b/cranelift/codegen/src/machinst/vcode.rs index 1af40ccf6d..7f36389cd9 100644 --- a/cranelift/codegen/src/machinst/vcode.rs +++ b/cranelift/codegen/src/machinst/vcode.rs @@ -1027,7 +1027,6 @@ impl VCode { // Spill from register to spillslot. let to = to.as_stack().unwrap(); let from_rreg = RealReg::from(from); - debug_assert_eq!(from.class(), to.class()); let spill = self.abi.gen_spill(to, from_rreg); do_emit(&spill, &[], &mut disasm, &mut buffer, &mut state); } @@ -1035,7 +1034,6 @@ impl VCode { // Load from spillslot to register. let from = from.as_stack().unwrap(); let to_rreg = Writable::from_reg(RealReg::from(to)); - debug_assert_eq!(from.class(), to.class()); let reload = self.abi.gen_reload(to_rreg, from); do_emit(&reload, &[], &mut disasm, &mut buffer, &mut state); } diff --git a/cranelift/filetests/filetests/isa/aarch64/amodes.clif b/cranelift/filetests/filetests/isa/aarch64/amodes.clif index 8347f9eb0c..c28ec2eb49 100644 --- a/cranelift/filetests/filetests/isa/aarch64/amodes.clif +++ b/cranelift/filetests/filetests/isa/aarch64/amodes.clif @@ -69,9 +69,10 @@ block0(v0: i64, v1: i64, v2: i64): } ; block0: -; add x0, x0, x2 -; add x0, x0, x1 -; ldr w0, [x0, #48] +; mov x6, x0 +; add x6, x6, x2 +; add x6, x6, x1 +; ldr w0, [x6, #48] ; ret function %f10(i64, i64, i64) -> i32 { @@ -232,11 +233,11 @@ block0(v0: i64): } ; block0: -; mov x6, x0 -; ldp x7, x1, [x6] -; mov x11, x7 -; stp x11, x1, [x0] -; mov x0, x7 +; mov x8, x0 +; mov x6, x8 +; ldp x0, x1, [x6] +; mov x7, x8 +; stp x0, x1, [x7] ; ret function %i128_imm_offset(i64) -> i128 { @@ -247,11 +248,11 @@ block0(v0: i64): } ; block0: -; mov x6, x0 -; ldp x7, x1, [x6, #16] -; mov x11, x7 -; stp x11, x1, [x0, #16] -; mov x0, x7 +; mov x8, x0 +; mov x6, x8 +; ldp x0, x1, [x6, #16] +; mov x7, x8 +; stp x0, x1, [x7, #16] ; ret function %i128_imm_offset_large(i64) -> i128 { @@ -262,11 +263,11 @@ block0(v0: i64): } ; block0: -; mov x6, x0 -; ldp x7, x1, [x6, #504] -; mov x11, x7 -; stp x11, x1, [x0, #504] -; mov x0, x7 +; mov x8, x0 +; mov x6, x8 +; ldp x0, x1, [x6, #504] +; mov x7, x8 +; stp x0, x1, [x7, #504] ; ret function %i128_imm_offset_negative_large(i64) -> i128 { @@ -277,11 +278,11 @@ block0(v0: i64): } ; block0: -; mov x6, x0 -; ldp x7, x1, [x6, #-512] -; mov x11, x7 -; stp x11, x1, [x0, #-512] -; mov x0, x7 +; mov x8, x0 +; mov x6, x8 +; ldp x0, x1, [x6, #-512] +; mov x7, x8 +; stp x0, x1, [x7, #-512] ; ret function %i128_add_offset(i64) -> i128 { @@ -293,11 +294,11 @@ block0(v0: i64): } ; block0: -; mov x6, x0 -; ldp x7, x1, [x6, #32] -; mov x11, x7 -; stp x11, x1, [x0, #32] -; mov x0, x7 +; mov x8, x0 +; mov x6, x8 +; ldp x0, x1, [x6, #32] +; mov x7, x8 +; stp x0, x1, [x7, #32] ; ret function %i128_32bit_sextend_simple(i32) -> i128 { @@ -327,13 +328,13 @@ block0(v0: i64, v1: i32): } ; block0: -; mov x7, x0 +; mov x11, x0 +; mov x7, x11 ; add x7, x7, x1, SXTW -; ldp x9, x10, [x7, #24] -; add x0, x0, x1, SXTW -; mov x14, x9 +; ldp x0, x10, [x7, #24] +; mov x9, x11 +; add x9, x9, x1, SXTW ; mov x1, x10 -; stp x14, x1, [x0, #24] -; mov x0, x9 +; stp x0, x1, [x9, #24] ; ret diff --git a/cranelift/filetests/filetests/isa/aarch64/bti.clif b/cranelift/filetests/filetests/isa/aarch64/bti.clif index 4e7ea3075f..157a767fa3 100644 --- a/cranelift/filetests/filetests/isa/aarch64/bti.clif +++ b/cranelift/filetests/filetests/isa/aarch64/bti.clif @@ -109,3 +109,4 @@ block0(v0: i64): ; blr x4 ; ldp fp, lr, [sp], #16 ; ret + diff --git a/cranelift/filetests/filetests/isa/aarch64/call.clif b/cranelift/filetests/filetests/isa/aarch64/call.clif index 7aec05dd8e..64ea276ae3 100644 --- a/cranelift/filetests/filetests/isa/aarch64/call.clif +++ b/cranelift/filetests/filetests/isa/aarch64/call.clif @@ -79,7 +79,7 @@ block0(v0: i8): ; stp fp, lr, [sp, #-16]! ; mov fp, sp ; block0: -; mov x15, x0 +; mov x8, x0 ; sub sp, sp, #16 ; virtual_sp_offset_adjust 16 ; movz x0, #42 @@ -90,9 +90,9 @@ block0(v0: i8): ; movz x5, #42 ; movz x6, #42 ; movz x7, #42 -; strb w15, [sp] -; ldr x14, 8 ; b 12 ; data TestCase(%g) + 0 -; blr x14 +; strb w8, [sp] +; ldr x8, 8 ; b 12 ; data TestCase(%g) + 0 +; blr x8 ; add sp, sp, #16 ; virtual_sp_offset_adjust -16 ; ldp fp, lr, [sp], #16 @@ -105,7 +105,7 @@ block0(v0: i8): } ; block0: -; mov x15, x0 +; mov x8, x0 ; mov x13, x1 ; movz x0, #42 ; movz x1, #42 @@ -115,7 +115,8 @@ block0(v0: i8): ; movz x5, #42 ; movz x6, #42 ; movz x7, #42 -; strb w15, [x13] +; mov x11, x8 +; strb w11, [x13] ; ret function %f8() { @@ -140,26 +141,26 @@ block0: ; mov fp, sp ; sub sp, sp, #48 ; block0: -; ldr x8, 8 ; b 12 ; data TestCase(%g0) + 0 -; blr x8 +; ldr x9, 8 ; b 12 ; data TestCase(%g0) + 0 +; blr x9 ; str q0, [sp, #32] ; ldr x9, 8 ; b 12 ; data TestCase(%g1) + 0 ; blr x9 ; str q0, [sp, #16] -; ldr x10, 8 ; b 12 ; data TestCase(%g1) + 0 -; blr x10 +; ldr x9, 8 ; b 12 ; data TestCase(%g1) + 0 +; blr x9 ; str q0, [sp] -; ldr x12, 8 ; b 12 ; data TestCase(%g2) + 0 -; blr x12 +; ldr x9, 8 ; b 12 ; data TestCase(%g2) + 0 +; blr x9 +; ldr x10, 8 ; b 12 ; data TestCase(%g3) + 0 ; ldr q0, [sp, #32] -; ldr x14, 8 ; b 12 ; data TestCase(%g3) + 0 -; blr x14 +; blr x10 +; ldr x11, 8 ; b 12 ; data TestCase(%g4) + 0 ; ldr q0, [sp, #16] -; ldr x0, 8 ; b 12 ; data TestCase(%g4) + 0 -; blr x0 +; blr x11 +; ldr x12, 8 ; b 12 ; data TestCase(%g4) + 0 ; ldr q0, [sp] -; ldr x2, 8 ; b 12 ; data TestCase(%g4) + 0 -; blr x2 +; blr x12 ; add sp, sp, #48 ; ldp fp, lr, [sp], #16 ; ret @@ -184,26 +185,26 @@ block0: ; mov fp, sp ; sub sp, sp, #48 ; block0: -; ldr x8, 8 ; b 12 ; data TestCase(%g0) + 0 -; blr x8 +; ldr x9, 8 ; b 12 ; data TestCase(%g0) + 0 +; blr x9 ; str q0, [sp, #32] ; ldr x9, 8 ; b 12 ; data TestCase(%g0) + 0 ; blr x9 ; str q0, [sp, #16] -; ldr x10, 8 ; b 12 ; data TestCase(%g0) + 0 -; blr x10 +; ldr x9, 8 ; b 12 ; data TestCase(%g0) + 0 +; blr x9 ; str q0, [sp] -; ldr x12, 8 ; b 12 ; data TestCase(%g1) + 0 -; blr x12 +; ldr x9, 8 ; b 12 ; data TestCase(%g1) + 0 +; blr x9 +; ldr x10, 8 ; b 12 ; data TestCase(%g2) + 0 ; ldr q0, [sp, #32] -; ldr x14, 8 ; b 12 ; data TestCase(%g2) + 0 -; blr x14 +; blr x10 +; ldr x11, 8 ; b 12 ; data TestCase(%g2) + 0 ; ldr q0, [sp, #16] -; ldr x0, 8 ; b 12 ; data TestCase(%g2) + 0 -; blr x0 +; blr x11 +; ldr x12, 8 ; b 12 ; data TestCase(%g2) + 0 ; ldr q0, [sp] -; ldr x2, 8 ; b 12 ; data TestCase(%g2) + 0 -; blr x2 +; blr x12 ; add sp, sp, #48 ; ldp fp, lr, [sp], #16 ; ret @@ -232,26 +233,26 @@ block0: ; mov fp, sp ; sub sp, sp, #48 ; block0: -; ldr x8, 8 ; b 12 ; data TestCase(%g0) + 0 -; blr x8 +; ldr x9, 8 ; b 12 ; data TestCase(%g0) + 0 +; blr x9 ; str q0, [sp, #32] ; ldr x9, 8 ; b 12 ; data TestCase(%g1) + 0 ; blr x9 ; str q0, [sp, #16] -; ldr x10, 8 ; b 12 ; data TestCase(%g2) + 0 -; blr x10 +; ldr x9, 8 ; b 12 ; data TestCase(%g2) + 0 +; blr x9 ; str q0, [sp] -; ldr x12, 8 ; b 12 ; data TestCase(%g3) + 0 -; blr x12 +; ldr x9, 8 ; b 12 ; data TestCase(%g3) + 0 +; blr x9 +; ldr x10, 8 ; b 12 ; data TestCase(%g4) + 0 ; ldr q0, [sp, #32] -; ldr x14, 8 ; b 12 ; data TestCase(%g4) + 0 -; blr x14 +; blr x10 +; ldr x11, 8 ; b 12 ; data TestCase(%g5) + 0 ; ldr q0, [sp, #16] -; ldr x0, 8 ; b 12 ; data TestCase(%g5) + 0 -; blr x0 +; blr x11 +; ldr x12, 8 ; b 12 ; data TestCase(%g6) + 0 ; ldr q0, [sp] -; ldr x2, 8 ; b 12 ; data TestCase(%g6) + 0 -; blr x2 +; blr x12 ; add sp, sp, #48 ; ldp fp, lr, [sp], #16 ; ret @@ -279,12 +280,11 @@ block0(v0: i64): ; stp fp, lr, [sp, #-16]! ; mov fp, sp ; block0: -; mov x7, x0 +; mov x1, x0 ; movz x0, #42 ; movz x2, #42 -; mov x1, x7 -; ldr x9, 8 ; b 12 ; data TestCase(%f11) + 0 -; blr x9 +; ldr x7, 8 ; b 12 ; data TestCase(%f11) + 0 +; blr x7 ; ldp fp, lr, [sp], #16 ; ret @@ -311,12 +311,11 @@ block0(v0: i64): ; stp fp, lr, [sp, #-16]! ; mov fp, sp ; block0: -; mov x7, x0 +; mov x2, x0 ; movz x3, #42 ; movz x0, #42 -; mov x2, x7 -; ldr x9, 8 ; b 12 ; data TestCase(%f12) + 0 -; blr x9 +; ldr x7, 8 ; b 12 ; data TestCase(%f12) + 0 +; blr x7 ; ldp fp, lr, [sp], #16 ; ret @@ -343,12 +342,11 @@ block0(v0: i64): ; stp fp, lr, [sp, #-16]! ; mov fp, sp ; block0: -; mov x7, x0 +; mov x1, x0 ; movz x2, #42 ; movz x0, #42 -; mov x1, x7 -; ldr x9, 8 ; b 12 ; data TestCase(%f13) + 0 -; blr x9 +; ldr x7, 8 ; b 12 ; data TestCase(%f13) + 0 +; blr x7 ; ldp fp, lr, [sp], #16 ; ret @@ -376,20 +374,19 @@ block0(v0: i128, v1: i64): ; stp fp, lr, [sp, #-16]! ; mov fp, sp ; block0: -; mov x11, x2 +; mov x6, x2 ; sub sp, sp, #16 ; virtual_sp_offset_adjust 16 -; mov x10, x0 -; mov x12, x1 -; mov x2, x10 -; mov x3, x12 -; mov x4, x10 -; mov x5, x12 -; mov x6, x11 -; str x10, [sp] -; str x12, [sp, #8] -; ldr x7, 8 ; b 12 ; data TestCase(%f14) + 0 -; blr x7 +; str x0, [sp] +; mov x4, x0 +; str x1, [sp, #8] +; mov x5, x1 +; ldr x12, 8 ; b 12 ; data TestCase(%f14) + 0 +; mov x0, x4 +; mov x2, x4 +; mov x1, x5 +; mov x3, x5 +; blr x12 ; add sp, sp, #16 ; virtual_sp_offset_adjust -16 ; ldp fp, lr, [sp], #16 @@ -419,20 +416,19 @@ block0(v0: i128, v1: i64): ; stp fp, lr, [sp, #-16]! ; mov fp, sp ; block0: -; mov x11, x2 +; mov x6, x2 ; sub sp, sp, #16 ; virtual_sp_offset_adjust 16 -; mov x10, x0 -; mov x12, x1 -; mov x2, x10 -; mov x3, x12 -; mov x4, x10 -; mov x5, x12 -; mov x6, x11 -; str x10, [sp] -; str x12, [sp, #8] -; ldr x7, 8 ; b 12 ; data TestCase(%f15) + 0 -; blr x7 +; str x0, [sp] +; mov x4, x0 +; str x1, [sp, #8] +; mov x5, x1 +; ldr x12, 8 ; b 12 ; data TestCase(%f15) + 0 +; mov x0, x4 +; mov x2, x4 +; mov x1, x5 +; mov x3, x5 +; blr x12 ; add sp, sp, #16 ; virtual_sp_offset_adjust -16 ; ldp fp, lr, [sp], #16 @@ -496,8 +492,8 @@ block0(v0: i64): ; str x24, [sp, #-16]! ; block0: ; mov x24, x8 -; ldr x5, 8 ; b 12 ; data TestCase(%g) + 0 -; blr x5 +; ldr x4, 8 ; b 12 ; data TestCase(%g) + 0 +; blr x4 ; mov x8, x24 ; ldr x24, [sp], #16 ; ldp fp, lr, [sp], #16 diff --git a/cranelift/filetests/filetests/isa/aarch64/floating-point.clif b/cranelift/filetests/filetests/isa/aarch64/floating-point.clif index 2ffd58c16e..c1e6b228cc 100644 --- a/cranelift/filetests/filetests/isa/aarch64/floating-point.clif +++ b/cranelift/filetests/filetests/isa/aarch64/floating-point.clif @@ -918,8 +918,9 @@ block0(v0: f32x4, v1: f32x4, v2: f32x4): } ; block0: -; fmla v2.4s, v2.4s, v0.4s, v1.4s -; mov v0.16b, v2.16b +; mov v5.16b, v2.16b +; fmla v5.4s, v5.4s, v0.4s, v1.4s +; mov v0.16b, v5.16b ; ret function %f79(f32x2, f32x2, f32x2) -> f32x2 { @@ -929,8 +930,9 @@ block0(v0: f32x2, v1: f32x2, v2: f32x2): } ; block0: -; fmla v2.2s, v2.2s, v0.2s, v1.2s -; mov v0.16b, v2.16b +; mov v5.16b, v2.16b +; fmla v5.2s, v5.2s, v0.2s, v1.2s +; mov v0.16b, v5.16b ; ret function %f80(f64x2, f64x2, f64x2) -> f64x2 { @@ -940,8 +942,9 @@ block0(v0: f64x2, v1: f64x2, v2: f64x2): } ; block0: -; fmla v2.2d, v2.2d, v0.2d, v1.2d -; mov v0.16b, v2.16b +; mov v5.16b, v2.16b +; fmla v5.2d, v5.2d, v0.2d, v1.2d +; mov v0.16b, v5.16b ; ret function %f81(f32x2, f32x2) -> f32x2 { diff --git a/cranelift/filetests/filetests/isa/aarch64/reftypes.clif b/cranelift/filetests/filetests/isa/aarch64/reftypes.clif index 5a514f587d..49520a0cf6 100644 --- a/cranelift/filetests/filetests/isa/aarch64/reftypes.clif +++ b/cranelift/filetests/filetests/isa/aarch64/reftypes.clif @@ -67,15 +67,15 @@ block3(v7: r64, v8: r64): ; mov fp, sp ; sub sp, sp, #32 ; block0: -; str x1, [sp, #16] ; str x0, [sp, #8] -; ldr x1, 8 ; b 12 ; data TestCase(%f) + 0 -; blr x1 -; mov x3, sp +; str x1, [sp, #16] +; ldr x3, 8 ; b 12 ; data TestCase(%f) + 0 +; blr x3 +; mov x2, sp ; ldr x9, [sp, #8] -; str x9, [x3] -; and w4, w0, #1 -; cbz x4, label1 ; b label3 +; str x9, [x2] +; and w3, w0, #1 +; cbz x3, label1 ; b label3 ; block1: ; b label2 ; block2: @@ -89,8 +89,8 @@ block3(v7: r64, v8: r64): ; ldr x1, [sp, #16] ; b label5 ; block5: -; mov x5, sp -; ldr x2, [x5] +; mov x4, sp +; ldr x2, [x4] ; add sp, sp, #32 ; ldp fp, lr, [sp], #16 ; ret diff --git a/cranelift/filetests/filetests/isa/aarch64/simd-narrow.clif b/cranelift/filetests/filetests/isa/aarch64/simd-narrow.clif index 50b147adff..b5940c2d64 100644 --- a/cranelift/filetests/filetests/isa/aarch64/simd-narrow.clif +++ b/cranelift/filetests/filetests/isa/aarch64/simd-narrow.clif @@ -9,8 +9,9 @@ block0(v0: i16x4, v1: i16x4): } ; block0: -; mov v0.d[1], v0.d[1], v1.d[0] -; sqxtn v0.8b, v0.8h +; mov v4.16b, v0.16b +; mov v4.d[1], v4.d[1], v1.d[0] +; sqxtn v0.8b, v4.8h ; ret function %snarrow_i16x8(i16x8, i16x8) -> i8x16 { @@ -31,8 +32,9 @@ block0(v0: i32x2, v1: i32x2): } ; block0: -; mov v0.d[1], v0.d[1], v1.d[0] -; sqxtn v0.4h, v0.4s +; mov v4.16b, v0.16b +; mov v4.d[1], v4.d[1], v1.d[0] +; sqxtn v0.4h, v4.4s ; ret function %snarrow_i32x4(i32x4, i32x4) -> i16x8 { @@ -64,8 +66,9 @@ block0(v0: i16x4, v1: i16x4): } ; block0: -; mov v0.d[1], v0.d[1], v1.d[0] -; sqxtun v0.8b, v0.8h +; mov v4.16b, v0.16b +; mov v4.d[1], v4.d[1], v1.d[0] +; sqxtun v0.8b, v4.8h ; ret function %unarrow_i16x8(i16x8, i16x8) -> i8x16 { @@ -86,8 +89,9 @@ block0(v0: i32x2, v1: i32x2): } ; block0: -; mov v0.d[1], v0.d[1], v1.d[0] -; sqxtun v0.4h, v0.4s +; mov v4.16b, v0.16b +; mov v4.d[1], v4.d[1], v1.d[0] +; sqxtun v0.4h, v4.4s ; ret function %unarrow_i32x4(i32x4, i32x4) -> i16x8 { @@ -119,8 +123,9 @@ block0(v0: i16x4, v1: i16x4): } ; block0: -; mov v0.d[1], v0.d[1], v1.d[0] -; uqxtn v0.8b, v0.8h +; mov v4.16b, v0.16b +; mov v4.d[1], v4.d[1], v1.d[0] +; uqxtn v0.8b, v4.8h ; ret function %uunarrow_i16x8(i16x8, i16x8) -> i8x16 { @@ -141,8 +146,9 @@ block0(v0: i32x2, v1: i32x2): } ; block0: -; mov v0.d[1], v0.d[1], v1.d[0] -; uqxtn v0.4h, v0.4s +; mov v4.16b, v0.16b +; mov v4.d[1], v4.d[1], v1.d[0] +; uqxtn v0.4h, v4.4s ; ret function %uunarrow_i32x4(i32x4, i32x4) -> i16x8 { diff --git a/cranelift/filetests/filetests/isa/aarch64/tls-elf-gd.clif b/cranelift/filetests/filetests/isa/aarch64/tls-elf-gd.clif index 016a624507..d31db4da88 100644 --- a/cranelift/filetests/filetests/isa/aarch64/tls-elf-gd.clif +++ b/cranelift/filetests/filetests/isa/aarch64/tls-elf-gd.clif @@ -20,8 +20,9 @@ block0(v0: i32): ; block0: ; mov x25, x0 ; elf_tls_get_addr x0, userextname0 +; mov x7, x25 ; mov x1, x0 -; mov x0, x25 +; mov x0, x7 ; ldp d8, d9, [sp], #16 ; ldp d10, d11, [sp], #16 ; ldp d12, d13, [sp], #16 diff --git a/cranelift/filetests/filetests/isa/s390x/atomic_cas-little.clif b/cranelift/filetests/filetests/isa/s390x/atomic_cas-little.clif index 0e095ac4e4..1bdeea3ac1 100644 --- a/cranelift/filetests/filetests/isa/s390x/atomic_cas-little.clif +++ b/cranelift/filetests/filetests/isa/s390x/atomic_cas-little.clif @@ -41,12 +41,13 @@ block0(v0: i64, v1: i16, v2: i16, v3: i64): ; block0: ; lgr %r9, %r4 ; sllk %r4, %r5, 3 -; nill %r5, 65532 -; lrvr %r2, %r3 +; lgr %r2, %r5 +; nill %r2, 65532 +; lrvr %r5, %r3 ; lgr %r3, %r9 ; lrvr %r3, %r3 -; l %r0, 0(%r5) -; 0: rll %r1, %r0, 16(%r4) ; rxsbg %r1, %r2, 176, 64, 48 ; jglh 1f ; risbgn %r1, %r3, 48, 64, 48 ; rll %r1, %r1, 16(%r4) ; cs %r0, %r1, 0(%r5) ; jglh 0b ; 1: +; l %r0, 0(%r2) +; 0: rll %r1, %r0, 16(%r4) ; rxsbg %r1, %r5, 176, 64, 48 ; jglh 1f ; risbgn %r1, %r3, 48, 64, 48 ; rll %r1, %r1, 16(%r4) ; cs %r0, %r1, 0(%r2) ; jglh 0b ; 1: ; rll %r2, %r0, 0(%r4) ; lrvr %r2, %r2 ; lmg %r9, %r15, 72(%r15) @@ -62,10 +63,11 @@ block0(v0: i64, v1: i8, v2: i8, v3: i64): ; block0: ; lgr %r11, %r4 ; sllk %r4, %r5, 3 -; nill %r5, 65532 -; lcr %r2, %r4 -; l %r0, 0(%r5) -; 0: rll %r1, %r0, 0(%r4) ; rxsbg %r1, %r3, 160, 40, 24 ; jglh 1f ; risbgn %r1, %r11, 32, 40, 24 ; rll %r1, %r1, 0(%r2) ; cs %r0, %r1, 0(%r5) ; jglh 0b ; 1: +; lgr %r2, %r5 +; nill %r2, 65532 +; lcr %r5, %r4 +; l %r0, 0(%r2) +; 0: rll %r1, %r0, 0(%r4) ; rxsbg %r1, %r3, 160, 40, 24 ; jglh 1f ; risbgn %r1, %r11, 32, 40, 24 ; rll %r1, %r1, 0(%r5) ; cs %r0, %r1, 0(%r2) ; jglh 0b ; 1: ; rll %r2, %r0, 8(%r4) ; lmg %r11, %r15, 88(%r15) ; br %r14 diff --git a/cranelift/filetests/filetests/isa/s390x/atomic_cas.clif b/cranelift/filetests/filetests/isa/s390x/atomic_cas.clif index 05e9650c65..db516f8bb4 100644 --- a/cranelift/filetests/filetests/isa/s390x/atomic_cas.clif +++ b/cranelift/filetests/filetests/isa/s390x/atomic_cas.clif @@ -31,13 +31,17 @@ block0(v0: i64, v1: i16, v2: i16, v3: i64): return v4 } +; stmg %r6, %r15, 48(%r15) ; block0: -; lgr %r2, %r4 +; lgr %r6, %r4 ; sllk %r4, %r5, 3 -; nill %r5, 65532 -; l %r0, 0(%r5) -; 0: rll %r1, %r0, 0(%r4) ; rxsbg %r1, %r3, 160, 48, 16 ; jglh 1f ; risbgn %r1, %r2, 32, 48, 16 ; rll %r1, %r1, 0(%r4) ; cs %r0, %r1, 0(%r5) ; jglh 0b ; 1: +; lgr %r2, %r5 +; nill %r2, 65532 +; l %r0, 0(%r2) +; lgr %r5, %r6 +; 0: rll %r1, %r0, 0(%r4) ; rxsbg %r1, %r3, 160, 48, 16 ; jglh 1f ; risbgn %r1, %r5, 32, 48, 16 ; rll %r1, %r1, 0(%r4) ; cs %r0, %r1, 0(%r2) ; jglh 0b ; 1: ; rll %r2, %r0, 16(%r4) +; lmg %r6, %r15, 48(%r15) ; br %r14 function %atomic_cas_i8(i64, i8, i8, i64) -> i8 { @@ -50,10 +54,11 @@ block0(v0: i64, v1: i8, v2: i8, v3: i64): ; block0: ; lgr %r11, %r4 ; sllk %r4, %r5, 3 -; nill %r5, 65532 -; lcr %r2, %r4 -; l %r0, 0(%r5) -; 0: rll %r1, %r0, 0(%r4) ; rxsbg %r1, %r3, 160, 40, 24 ; jglh 1f ; risbgn %r1, %r11, 32, 40, 24 ; rll %r1, %r1, 0(%r2) ; cs %r0, %r1, 0(%r5) ; jglh 0b ; 1: +; lgr %r2, %r5 +; nill %r2, 65532 +; lcr %r5, %r4 +; l %r0, 0(%r2) +; 0: rll %r1, %r0, 0(%r4) ; rxsbg %r1, %r3, 160, 40, 24 ; jglh 1f ; risbgn %r1, %r11, 32, 40, 24 ; rll %r1, %r1, 0(%r5) ; cs %r0, %r1, 0(%r2) ; jglh 0b ; 1: ; rll %r2, %r0, 8(%r4) ; lmg %r11, %r15, 88(%r15) ; br %r14 diff --git a/cranelift/filetests/filetests/isa/s390x/atomic_rmw-arch13.clif b/cranelift/filetests/filetests/isa/s390x/atomic_rmw-arch13.clif index 498df64a16..3e80200734 100644 --- a/cranelift/filetests/filetests/isa/s390x/atomic_rmw-arch13.clif +++ b/cranelift/filetests/filetests/isa/s390x/atomic_rmw-arch13.clif @@ -32,12 +32,12 @@ block0(v0: i64, v1: i64, v2: i16): } ; block0: -; lgr %r5, %r4 -; sllk %r4, %r3, 3 -; nill %r3, 65532 -; l %r0, 0(%r3) -; 0: rll %r1, %r0, 0(%r4) ; rnsbg %r1, %r5, 32, 48, 16 ; xilf %r1, 4294901760 ; rll %r1, %r1, 0(%r4) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: -; rll %r2, %r0, 16(%r4) +; sllk %r2, %r3, 3 +; lgr %r5, %r3 +; nill %r5, 65532 +; l %r0, 0(%r5) +; 0: rll %r1, %r0, 0(%r2) ; rnsbg %r1, %r4, 32, 48, 16 ; xilf %r1, 4294901760 ; rll %r1, %r1, 0(%r2) ; cs %r0, %r1, 0(%r5) ; jglh 0b ; 1: +; rll %r2, %r0, 16(%r2) ; br %r14 function %atomic_rmw_nand_i8(i64, i64, i8) -> i8 { @@ -46,14 +46,16 @@ block0(v0: i64, v1: i64, v2: i8): return v3 } +; stmg %r11, %r15, 88(%r15) ; block0: -; lgr %r2, %r4 -; sllk %r4, %r3, 3 -; nill %r3, 65532 -; lcr %r5, %r4 -; l %r0, 0(%r3) -; 0: rll %r1, %r0, 0(%r4) ; rnsbg %r1, %r2, 32, 40, 24 ; xilf %r1, 4278190080 ; rll %r1, %r1, 0(%r5) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: -; rll %r2, %r0, 8(%r4) +; sllk %r11, %r3, 3 +; lgr %r5, %r3 +; nill %r5, 65532 +; lcr %r2, %r11 +; l %r0, 0(%r5) +; 0: rll %r1, %r0, 0(%r11) ; rnsbg %r1, %r4, 32, 40, 24 ; xilf %r1, 4278190080 ; rll %r1, %r1, 0(%r2) ; cs %r0, %r1, 0(%r5) ; jglh 0b ; 1: +; rll %r2, %r0, 8(%r11) +; lmg %r11, %r15, 88(%r15) ; br %r14 function %atomic_rmw_nand_i64(i64, i64, i64) -> i64 { @@ -89,13 +91,13 @@ block0(v0: i64, v1: i64, v2: i16): } ; block0: -; lgr %r2, %r4 -; sllk %r4, %r3, 3 -; nill %r3, 65532 -; lrvr %r5, %r2 -; l %r0, 0(%r3) -; 0: rll %r1, %r0, 16(%r4) ; rnsbg %r1, %r5, 48, 64, 48 ; xilf %r1, 65535 ; rll %r1, %r1, 16(%r4) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: -; rll %r3, %r0, 0(%r4) +; sllk %r2, %r3, 3 +; lgr %r5, %r3 +; nill %r5, 65532 +; lrvr %r4, %r4 +; l %r0, 0(%r5) +; 0: rll %r1, %r0, 16(%r2) ; rnsbg %r1, %r4, 48, 64, 48 ; xilf %r1, 65535 ; rll %r1, %r1, 16(%r2) ; cs %r0, %r1, 0(%r5) ; jglh 0b ; 1: +; rll %r3, %r0, 0(%r2) ; lrvr %r2, %r3 ; br %r14 @@ -105,13 +107,15 @@ block0(v0: i64, v1: i64, v2: i8): return v3 } +; stmg %r11, %r15, 88(%r15) ; block0: -; lgr %r2, %r4 -; sllk %r4, %r3, 3 -; nill %r3, 65532 -; lcr %r5, %r4 -; l %r0, 0(%r3) -; 0: rll %r1, %r0, 0(%r4) ; rnsbg %r1, %r2, 32, 40, 24 ; xilf %r1, 4278190080 ; rll %r1, %r1, 0(%r5) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: -; rll %r2, %r0, 8(%r4) +; sllk %r11, %r3, 3 +; lgr %r5, %r3 +; nill %r5, 65532 +; lcr %r2, %r11 +; l %r0, 0(%r5) +; 0: rll %r1, %r0, 0(%r11) ; rnsbg %r1, %r4, 32, 40, 24 ; xilf %r1, 4278190080 ; rll %r1, %r1, 0(%r2) ; cs %r0, %r1, 0(%r5) ; jglh 0b ; 1: +; rll %r2, %r0, 8(%r11) +; lmg %r11, %r15, 88(%r15) ; br %r14 diff --git a/cranelift/filetests/filetests/isa/s390x/atomic_rmw-little.clif b/cranelift/filetests/filetests/isa/s390x/atomic_rmw-little.clif index b7ca9939a0..64eb171ba5 100644 --- a/cranelift/filetests/filetests/isa/s390x/atomic_rmw-little.clif +++ b/cranelift/filetests/filetests/isa/s390x/atomic_rmw-little.clif @@ -38,13 +38,13 @@ block0(v0: i64, v1: i64, v2: i16): } ; block0: -; lgr %r2, %r4 -; sllk %r4, %r3, 3 -; nill %r3, 65532 -; lrvr %r5, %r2 -; l %r0, 0(%r3) -; 0: rll %r1, %r0, 16(%r4) ; risbgn %r1, %r5, 48, 64, 48 ; rll %r1, %r1, 16(%r4) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: -; rll %r3, %r0, 0(%r4) +; sllk %r2, %r3, 3 +; lgr %r5, %r3 +; nill %r5, 65532 +; lrvr %r4, %r4 +; l %r0, 0(%r5) +; 0: rll %r1, %r0, 16(%r2) ; risbgn %r1, %r4, 48, 64, 48 ; rll %r1, %r1, 16(%r2) ; cs %r0, %r1, 0(%r5) ; jglh 0b ; 1: +; rll %r3, %r0, 0(%r2) ; lrvr %r2, %r3 ; br %r14 @@ -54,14 +54,16 @@ block0(v0: i64, v1: i64, v2: i8): return v3 } +; stmg %r11, %r15, 88(%r15) ; block0: -; lgr %r2, %r4 -; sllk %r4, %r3, 3 -; nill %r3, 65532 -; lcr %r5, %r4 -; l %r0, 0(%r3) -; 0: rll %r1, %r0, 0(%r4) ; risbgn %r1, %r2, 32, 40, 24 ; rll %r1, %r1, 0(%r5) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: -; rll %r2, %r0, 8(%r4) +; sllk %r11, %r3, 3 +; lgr %r5, %r3 +; nill %r5, 65532 +; lcr %r2, %r11 +; l %r0, 0(%r5) +; 0: rll %r1, %r0, 0(%r11) ; risbgn %r1, %r4, 32, 40, 24 ; rll %r1, %r1, 0(%r2) ; cs %r0, %r1, 0(%r5) ; jglh 0b ; 1: +; rll %r2, %r0, 8(%r11) +; lmg %r11, %r15, 88(%r15) ; br %r14 function %atomic_rmw_add_i64(i64, i64, i64) -> i64 { @@ -95,13 +97,13 @@ block0(v0: i64, v1: i64, v2: i16): } ; block0: -; lgr %r2, %r4 -; sllk %r4, %r3, 3 -; nill %r3, 65532 -; sllk %r5, %r2, 16 -; l %r0, 0(%r3) -; 0: rll %r1, %r0, 16(%r4) ; lrvr %r1, %r1 ; ar %r1, %r5 ; lrvr %r1, %r1 ; rll %r1, %r1, 16(%r4) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: -; rll %r3, %r0, 0(%r4) +; sllk %r2, %r3, 3 +; lgr %r5, %r3 +; nill %r5, 65532 +; sllk %r4, %r4, 16 +; l %r0, 0(%r5) +; 0: rll %r1, %r0, 16(%r2) ; lrvr %r1, %r1 ; ar %r1, %r4 ; lrvr %r1, %r1 ; rll %r1, %r1, 16(%r2) ; cs %r0, %r1, 0(%r5) ; jglh 0b ; 1: +; rll %r3, %r0, 0(%r2) ; lrvr %r2, %r3 ; br %r14 @@ -111,14 +113,17 @@ block0(v0: i64, v1: i64, v2: i8): return v3 } +; stmg %r11, %r15, 88(%r15) ; block0: -; sllk %r2, %r3, 3 -; nill %r3, 65532 -; sllk %r5, %r4, 24 -; lcr %r4, %r2 -; l %r0, 0(%r3) -; 0: rll %r1, %r0, 0(%r2) ; ar %r1, %r5 ; rll %r1, %r1, 0(%r4) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: -; rll %r2, %r0, 8(%r2) +; sllk %r11, %r3, 3 +; lgr %r5, %r3 +; nill %r5, 65532 +; sllk %r4, %r4, 24 +; lcr %r2, %r11 +; l %r0, 0(%r5) +; 0: rll %r1, %r0, 0(%r11) ; ar %r1, %r4 ; rll %r1, %r1, 0(%r2) ; cs %r0, %r1, 0(%r5) ; jglh 0b ; 1: +; rll %r2, %r0, 8(%r11) +; lmg %r11, %r15, 88(%r15) ; br %r14 function %atomic_rmw_sub_i64(i64, i64, i64) -> i64 { @@ -152,13 +157,13 @@ block0(v0: i64, v1: i64, v2: i16): } ; block0: -; lgr %r2, %r4 -; sllk %r4, %r3, 3 -; nill %r3, 65532 -; sllk %r5, %r2, 16 -; l %r0, 0(%r3) -; 0: rll %r1, %r0, 16(%r4) ; lrvr %r1, %r1 ; sr %r1, %r5 ; lrvr %r1, %r1 ; rll %r1, %r1, 16(%r4) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: -; rll %r3, %r0, 0(%r4) +; sllk %r2, %r3, 3 +; lgr %r5, %r3 +; nill %r5, 65532 +; sllk %r4, %r4, 16 +; l %r0, 0(%r5) +; 0: rll %r1, %r0, 16(%r2) ; lrvr %r1, %r1 ; sr %r1, %r4 ; lrvr %r1, %r1 ; rll %r1, %r1, 16(%r2) ; cs %r0, %r1, 0(%r5) ; jglh 0b ; 1: +; rll %r3, %r0, 0(%r2) ; lrvr %r2, %r3 ; br %r14 @@ -168,14 +173,17 @@ block0(v0: i64, v1: i64, v2: i8): return v3 } +; stmg %r11, %r15, 88(%r15) ; block0: -; sllk %r2, %r3, 3 -; nill %r3, 65532 -; sllk %r5, %r4, 24 -; lcr %r4, %r2 -; l %r0, 0(%r3) -; 0: rll %r1, %r0, 0(%r2) ; sr %r1, %r5 ; rll %r1, %r1, 0(%r4) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: -; rll %r2, %r0, 8(%r2) +; sllk %r11, %r3, 3 +; lgr %r5, %r3 +; nill %r5, 65532 +; sllk %r4, %r4, 24 +; lcr %r2, %r11 +; l %r0, 0(%r5) +; 0: rll %r1, %r0, 0(%r11) ; sr %r1, %r4 ; rll %r1, %r1, 0(%r2) ; cs %r0, %r1, 0(%r5) ; jglh 0b ; 1: +; rll %r2, %r0, 8(%r11) +; lmg %r11, %r15, 88(%r15) ; br %r14 function %atomic_rmw_and_i64(i64, i64, i64) -> i64 { @@ -209,13 +217,13 @@ block0(v0: i64, v1: i64, v2: i16): } ; block0: -; lgr %r2, %r4 -; sllk %r4, %r3, 3 -; nill %r3, 65532 -; lrvr %r5, %r2 -; l %r0, 0(%r3) -; 0: rll %r1, %r0, 16(%r4) ; rnsbg %r1, %r5, 48, 64, 48 ; rll %r1, %r1, 16(%r4) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: -; rll %r3, %r0, 0(%r4) +; sllk %r2, %r3, 3 +; lgr %r5, %r3 +; nill %r5, 65532 +; lrvr %r4, %r4 +; l %r0, 0(%r5) +; 0: rll %r1, %r0, 16(%r2) ; rnsbg %r1, %r4, 48, 64, 48 ; rll %r1, %r1, 16(%r2) ; cs %r0, %r1, 0(%r5) ; jglh 0b ; 1: +; rll %r3, %r0, 0(%r2) ; lrvr %r2, %r3 ; br %r14 @@ -225,14 +233,16 @@ block0(v0: i64, v1: i64, v2: i8): return v3 } +; stmg %r11, %r15, 88(%r15) ; block0: -; lgr %r2, %r4 -; sllk %r4, %r3, 3 -; nill %r3, 65532 -; lcr %r5, %r4 -; l %r0, 0(%r3) -; 0: rll %r1, %r0, 0(%r4) ; rnsbg %r1, %r2, 32, 40, 24 ; rll %r1, %r1, 0(%r5) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: -; rll %r2, %r0, 8(%r4) +; sllk %r11, %r3, 3 +; lgr %r5, %r3 +; nill %r5, 65532 +; lcr %r2, %r11 +; l %r0, 0(%r5) +; 0: rll %r1, %r0, 0(%r11) ; rnsbg %r1, %r4, 32, 40, 24 ; rll %r1, %r1, 0(%r2) ; cs %r0, %r1, 0(%r5) ; jglh 0b ; 1: +; rll %r2, %r0, 8(%r11) +; lmg %r11, %r15, 88(%r15) ; br %r14 function %atomic_rmw_or_i64(i64, i64, i64) -> i64 { @@ -266,13 +276,13 @@ block0(v0: i64, v1: i64, v2: i16): } ; block0: -; lgr %r2, %r4 -; sllk %r4, %r3, 3 -; nill %r3, 65532 -; lrvr %r5, %r2 -; l %r0, 0(%r3) -; 0: rll %r1, %r0, 16(%r4) ; rosbg %r1, %r5, 48, 64, 48 ; rll %r1, %r1, 16(%r4) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: -; rll %r3, %r0, 0(%r4) +; sllk %r2, %r3, 3 +; lgr %r5, %r3 +; nill %r5, 65532 +; lrvr %r4, %r4 +; l %r0, 0(%r5) +; 0: rll %r1, %r0, 16(%r2) ; rosbg %r1, %r4, 48, 64, 48 ; rll %r1, %r1, 16(%r2) ; cs %r0, %r1, 0(%r5) ; jglh 0b ; 1: +; rll %r3, %r0, 0(%r2) ; lrvr %r2, %r3 ; br %r14 @@ -282,14 +292,16 @@ block0(v0: i64, v1: i64, v2: i8): return v3 } +; stmg %r11, %r15, 88(%r15) ; block0: -; lgr %r2, %r4 -; sllk %r4, %r3, 3 -; nill %r3, 65532 -; lcr %r5, %r4 -; l %r0, 0(%r3) -; 0: rll %r1, %r0, 0(%r4) ; rosbg %r1, %r2, 32, 40, 24 ; rll %r1, %r1, 0(%r5) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: -; rll %r2, %r0, 8(%r4) +; sllk %r11, %r3, 3 +; lgr %r5, %r3 +; nill %r5, 65532 +; lcr %r2, %r11 +; l %r0, 0(%r5) +; 0: rll %r1, %r0, 0(%r11) ; rosbg %r1, %r4, 32, 40, 24 ; rll %r1, %r1, 0(%r2) ; cs %r0, %r1, 0(%r5) ; jglh 0b ; 1: +; rll %r2, %r0, 8(%r11) +; lmg %r11, %r15, 88(%r15) ; br %r14 function %atomic_rmw_xor_i64(i64, i64, i64) -> i64 { @@ -323,13 +335,13 @@ block0(v0: i64, v1: i64, v2: i16): } ; block0: -; lgr %r2, %r4 -; sllk %r4, %r3, 3 -; nill %r3, 65532 -; lrvr %r5, %r2 -; l %r0, 0(%r3) -; 0: rll %r1, %r0, 16(%r4) ; rxsbg %r1, %r5, 48, 64, 48 ; rll %r1, %r1, 16(%r4) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: -; rll %r3, %r0, 0(%r4) +; sllk %r2, %r3, 3 +; lgr %r5, %r3 +; nill %r5, 65532 +; lrvr %r4, %r4 +; l %r0, 0(%r5) +; 0: rll %r1, %r0, 16(%r2) ; rxsbg %r1, %r4, 48, 64, 48 ; rll %r1, %r1, 16(%r2) ; cs %r0, %r1, 0(%r5) ; jglh 0b ; 1: +; rll %r3, %r0, 0(%r2) ; lrvr %r2, %r3 ; br %r14 @@ -339,14 +351,16 @@ block0(v0: i64, v1: i64, v2: i8): return v3 } +; stmg %r11, %r15, 88(%r15) ; block0: -; lgr %r2, %r4 -; sllk %r4, %r3, 3 -; nill %r3, 65532 -; lcr %r5, %r4 -; l %r0, 0(%r3) -; 0: rll %r1, %r0, 0(%r4) ; rxsbg %r1, %r2, 32, 40, 24 ; rll %r1, %r1, 0(%r5) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: -; rll %r2, %r0, 8(%r4) +; sllk %r11, %r3, 3 +; lgr %r5, %r3 +; nill %r5, 65532 +; lcr %r2, %r11 +; l %r0, 0(%r5) +; 0: rll %r1, %r0, 0(%r11) ; rxsbg %r1, %r4, 32, 40, 24 ; rll %r1, %r1, 0(%r2) ; cs %r0, %r1, 0(%r5) ; jglh 0b ; 1: +; rll %r2, %r0, 8(%r11) +; lmg %r11, %r15, 88(%r15) ; br %r14 function %atomic_rmw_nand_i64(i64, i64, i64) -> i64 { @@ -382,13 +396,13 @@ block0(v0: i64, v1: i64, v2: i16): } ; block0: -; lgr %r2, %r4 -; sllk %r4, %r3, 3 -; nill %r3, 65532 -; lrvr %r5, %r2 -; l %r0, 0(%r3) -; 0: rll %r1, %r0, 16(%r4) ; rnsbg %r1, %r5, 48, 64, 48 ; xilf %r1, 65535 ; rll %r1, %r1, 16(%r4) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: -; rll %r3, %r0, 0(%r4) +; sllk %r2, %r3, 3 +; lgr %r5, %r3 +; nill %r5, 65532 +; lrvr %r4, %r4 +; l %r0, 0(%r5) +; 0: rll %r1, %r0, 16(%r2) ; rnsbg %r1, %r4, 48, 64, 48 ; xilf %r1, 65535 ; rll %r1, %r1, 16(%r2) ; cs %r0, %r1, 0(%r5) ; jglh 0b ; 1: +; rll %r3, %r0, 0(%r2) ; lrvr %r2, %r3 ; br %r14 @@ -398,14 +412,16 @@ block0(v0: i64, v1: i64, v2: i8): return v3 } +; stmg %r11, %r15, 88(%r15) ; block0: -; lgr %r2, %r4 -; sllk %r4, %r3, 3 -; nill %r3, 65532 -; lcr %r5, %r4 -; l %r0, 0(%r3) -; 0: rll %r1, %r0, 0(%r4) ; rnsbg %r1, %r2, 32, 40, 24 ; xilf %r1, 4278190080 ; rll %r1, %r1, 0(%r5) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: -; rll %r2, %r0, 8(%r4) +; sllk %r11, %r3, 3 +; lgr %r5, %r3 +; nill %r5, 65532 +; lcr %r2, %r11 +; l %r0, 0(%r5) +; 0: rll %r1, %r0, 0(%r11) ; rnsbg %r1, %r4, 32, 40, 24 ; xilf %r1, 4278190080 ; rll %r1, %r1, 0(%r2) ; cs %r0, %r1, 0(%r5) ; jglh 0b ; 1: +; rll %r2, %r0, 8(%r11) +; lmg %r11, %r15, 88(%r15) ; br %r14 function %atomic_rmw_smin_i64(i64, i64, i64) -> i64 { @@ -439,13 +455,13 @@ block0(v0: i64, v1: i64, v2: i16): } ; block0: -; lgr %r2, %r4 -; sllk %r4, %r3, 3 -; nill %r3, 65532 -; sllk %r5, %r2, 16 -; l %r0, 0(%r3) -; 0: rll %r1, %r0, 16(%r4) ; lrvr %r1, %r1 ; cr %r5, %r1 ; jgnl 1f ; risbgn %r1, %r5, 32, 48, 0 ; lrvr %r1, %r1 ; rll %r1, %r1, 16(%r4) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: -; rll %r3, %r0, 0(%r4) +; sllk %r2, %r3, 3 +; lgr %r5, %r3 +; nill %r5, 65532 +; sllk %r4, %r4, 16 +; l %r0, 0(%r5) +; 0: rll %r1, %r0, 16(%r2) ; lrvr %r1, %r1 ; cr %r4, %r1 ; jgnl 1f ; risbgn %r1, %r4, 32, 48, 0 ; lrvr %r1, %r1 ; rll %r1, %r1, 16(%r2) ; cs %r0, %r1, 0(%r5) ; jglh 0b ; 1: +; rll %r3, %r0, 0(%r2) ; lrvr %r2, %r3 ; br %r14 @@ -455,14 +471,17 @@ block0(v0: i64, v1: i64, v2: i8): return v3 } +; stmg %r11, %r15, 88(%r15) ; block0: -; sllk %r2, %r3, 3 -; nill %r3, 65532 -; sllk %r5, %r4, 24 -; lcr %r4, %r2 -; l %r0, 0(%r3) -; 0: rll %r1, %r0, 0(%r2) ; cr %r5, %r1 ; jgnl 1f ; risbgn %r1, %r5, 32, 40, 0 ; rll %r1, %r1, 0(%r4) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: -; rll %r2, %r0, 8(%r2) +; sllk %r11, %r3, 3 +; lgr %r5, %r3 +; nill %r5, 65532 +; sllk %r4, %r4, 24 +; lcr %r2, %r11 +; l %r0, 0(%r5) +; 0: rll %r1, %r0, 0(%r11) ; cr %r4, %r1 ; jgnl 1f ; risbgn %r1, %r4, 32, 40, 0 ; rll %r1, %r1, 0(%r2) ; cs %r0, %r1, 0(%r5) ; jglh 0b ; 1: +; rll %r2, %r0, 8(%r11) +; lmg %r11, %r15, 88(%r15) ; br %r14 function %atomic_rmw_smax_i64(i64, i64, i64) -> i64 { @@ -496,13 +515,13 @@ block0(v0: i64, v1: i64, v2: i16): } ; block0: -; lgr %r2, %r4 -; sllk %r4, %r3, 3 -; nill %r3, 65532 -; sllk %r5, %r2, 16 -; l %r0, 0(%r3) -; 0: rll %r1, %r0, 16(%r4) ; lrvr %r1, %r1 ; cr %r5, %r1 ; jgnh 1f ; risbgn %r1, %r5, 32, 48, 0 ; lrvr %r1, %r1 ; rll %r1, %r1, 16(%r4) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: -; rll %r3, %r0, 0(%r4) +; sllk %r2, %r3, 3 +; lgr %r5, %r3 +; nill %r5, 65532 +; sllk %r4, %r4, 16 +; l %r0, 0(%r5) +; 0: rll %r1, %r0, 16(%r2) ; lrvr %r1, %r1 ; cr %r4, %r1 ; jgnh 1f ; risbgn %r1, %r4, 32, 48, 0 ; lrvr %r1, %r1 ; rll %r1, %r1, 16(%r2) ; cs %r0, %r1, 0(%r5) ; jglh 0b ; 1: +; rll %r3, %r0, 0(%r2) ; lrvr %r2, %r3 ; br %r14 @@ -512,14 +531,17 @@ block0(v0: i64, v1: i64, v2: i8): return v3 } +; stmg %r11, %r15, 88(%r15) ; block0: -; sllk %r2, %r3, 3 -; nill %r3, 65532 -; sllk %r5, %r4, 24 -; lcr %r4, %r2 -; l %r0, 0(%r3) -; 0: rll %r1, %r0, 0(%r2) ; cr %r5, %r1 ; jgnh 1f ; risbgn %r1, %r5, 32, 40, 0 ; rll %r1, %r1, 0(%r4) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: -; rll %r2, %r0, 8(%r2) +; sllk %r11, %r3, 3 +; lgr %r5, %r3 +; nill %r5, 65532 +; sllk %r4, %r4, 24 +; lcr %r2, %r11 +; l %r0, 0(%r5) +; 0: rll %r1, %r0, 0(%r11) ; cr %r4, %r1 ; jgnh 1f ; risbgn %r1, %r4, 32, 40, 0 ; rll %r1, %r1, 0(%r2) ; cs %r0, %r1, 0(%r5) ; jglh 0b ; 1: +; rll %r2, %r0, 8(%r11) +; lmg %r11, %r15, 88(%r15) ; br %r14 function %atomic_rmw_umin_i64(i64, i64, i64) -> i64 { @@ -553,13 +575,13 @@ block0(v0: i64, v1: i64, v2: i16): } ; block0: -; lgr %r2, %r4 -; sllk %r4, %r3, 3 -; nill %r3, 65532 -; sllk %r5, %r2, 16 -; l %r0, 0(%r3) -; 0: rll %r1, %r0, 16(%r4) ; lrvr %r1, %r1 ; clr %r5, %r1 ; jgnl 1f ; risbgn %r1, %r5, 32, 48, 0 ; lrvr %r1, %r1 ; rll %r1, %r1, 16(%r4) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: -; rll %r3, %r0, 0(%r4) +; sllk %r2, %r3, 3 +; lgr %r5, %r3 +; nill %r5, 65532 +; sllk %r4, %r4, 16 +; l %r0, 0(%r5) +; 0: rll %r1, %r0, 16(%r2) ; lrvr %r1, %r1 ; clr %r4, %r1 ; jgnl 1f ; risbgn %r1, %r4, 32, 48, 0 ; lrvr %r1, %r1 ; rll %r1, %r1, 16(%r2) ; cs %r0, %r1, 0(%r5) ; jglh 0b ; 1: +; rll %r3, %r0, 0(%r2) ; lrvr %r2, %r3 ; br %r14 @@ -569,14 +591,17 @@ block0(v0: i64, v1: i64, v2: i8): return v3 } +; stmg %r11, %r15, 88(%r15) ; block0: -; sllk %r2, %r3, 3 -; nill %r3, 65532 -; sllk %r5, %r4, 24 -; lcr %r4, %r2 -; l %r0, 0(%r3) -; 0: rll %r1, %r0, 0(%r2) ; clr %r5, %r1 ; jgnl 1f ; risbgn %r1, %r5, 32, 40, 0 ; rll %r1, %r1, 0(%r4) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: -; rll %r2, %r0, 8(%r2) +; sllk %r11, %r3, 3 +; lgr %r5, %r3 +; nill %r5, 65532 +; sllk %r4, %r4, 24 +; lcr %r2, %r11 +; l %r0, 0(%r5) +; 0: rll %r1, %r0, 0(%r11) ; clr %r4, %r1 ; jgnl 1f ; risbgn %r1, %r4, 32, 40, 0 ; rll %r1, %r1, 0(%r2) ; cs %r0, %r1, 0(%r5) ; jglh 0b ; 1: +; rll %r2, %r0, 8(%r11) +; lmg %r11, %r15, 88(%r15) ; br %r14 function %atomic_rmw_umax_i64(i64, i64, i64) -> i64 { @@ -610,13 +635,13 @@ block0(v0: i64, v1: i64, v2: i16): } ; block0: -; lgr %r2, %r4 -; sllk %r4, %r3, 3 -; nill %r3, 65532 -; sllk %r5, %r2, 16 -; l %r0, 0(%r3) -; 0: rll %r1, %r0, 16(%r4) ; lrvr %r1, %r1 ; clr %r5, %r1 ; jgnh 1f ; risbgn %r1, %r5, 32, 48, 0 ; lrvr %r1, %r1 ; rll %r1, %r1, 16(%r4) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: -; rll %r3, %r0, 0(%r4) +; sllk %r2, %r3, 3 +; lgr %r5, %r3 +; nill %r5, 65532 +; sllk %r4, %r4, 16 +; l %r0, 0(%r5) +; 0: rll %r1, %r0, 16(%r2) ; lrvr %r1, %r1 ; clr %r4, %r1 ; jgnh 1f ; risbgn %r1, %r4, 32, 48, 0 ; lrvr %r1, %r1 ; rll %r1, %r1, 16(%r2) ; cs %r0, %r1, 0(%r5) ; jglh 0b ; 1: +; rll %r3, %r0, 0(%r2) ; lrvr %r2, %r3 ; br %r14 @@ -626,13 +651,16 @@ block0(v0: i64, v1: i64, v2: i8): return v3 } +; stmg %r11, %r15, 88(%r15) ; block0: -; sllk %r2, %r3, 3 -; nill %r3, 65532 -; sllk %r5, %r4, 24 -; lcr %r4, %r2 -; l %r0, 0(%r3) -; 0: rll %r1, %r0, 0(%r2) ; clr %r5, %r1 ; jgnh 1f ; risbgn %r1, %r5, 32, 40, 0 ; rll %r1, %r1, 0(%r4) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: -; rll %r2, %r0, 8(%r2) +; sllk %r11, %r3, 3 +; lgr %r5, %r3 +; nill %r5, 65532 +; sllk %r4, %r4, 24 +; lcr %r2, %r11 +; l %r0, 0(%r5) +; 0: rll %r1, %r0, 0(%r11) ; clr %r4, %r1 ; jgnh 1f ; risbgn %r1, %r4, 32, 40, 0 ; rll %r1, %r1, 0(%r2) ; cs %r0, %r1, 0(%r5) ; jglh 0b ; 1: +; rll %r2, %r0, 8(%r11) +; lmg %r11, %r15, 88(%r15) ; br %r14 diff --git a/cranelift/filetests/filetests/isa/s390x/atomic_rmw.clif b/cranelift/filetests/filetests/isa/s390x/atomic_rmw.clif index f1bf6f23dc..f95940aae7 100644 --- a/cranelift/filetests/filetests/isa/s390x/atomic_rmw.clif +++ b/cranelift/filetests/filetests/isa/s390x/atomic_rmw.clif @@ -36,12 +36,12 @@ block0(v0: i64, v1: i64, v2: i16): } ; block0: -; lgr %r5, %r4 -; sllk %r4, %r3, 3 -; nill %r3, 65532 -; l %r0, 0(%r3) -; 0: rll %r1, %r0, 0(%r4) ; risbgn %r1, %r5, 32, 48, 16 ; rll %r1, %r1, 0(%r4) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: -; rll %r2, %r0, 16(%r4) +; sllk %r2, %r3, 3 +; lgr %r5, %r3 +; nill %r5, 65532 +; l %r0, 0(%r5) +; 0: rll %r1, %r0, 0(%r2) ; risbgn %r1, %r4, 32, 48, 16 ; rll %r1, %r1, 0(%r2) ; cs %r0, %r1, 0(%r5) ; jglh 0b ; 1: +; rll %r2, %r0, 16(%r2) ; br %r14 function %atomic_rmw_xchg_i8(i64, i64, i8) -> i8 { @@ -50,14 +50,16 @@ block0(v0: i64, v1: i64, v2: i8): return v3 } +; stmg %r11, %r15, 88(%r15) ; block0: -; lgr %r2, %r4 -; sllk %r4, %r3, 3 -; nill %r3, 65532 -; lcr %r5, %r4 -; l %r0, 0(%r3) -; 0: rll %r1, %r0, 0(%r4) ; risbgn %r1, %r2, 32, 40, 24 ; rll %r1, %r1, 0(%r5) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: -; rll %r2, %r0, 8(%r4) +; sllk %r11, %r3, 3 +; lgr %r5, %r3 +; nill %r5, 65532 +; lcr %r2, %r11 +; l %r0, 0(%r5) +; 0: rll %r1, %r0, 0(%r11) ; risbgn %r1, %r4, 32, 40, 24 ; rll %r1, %r1, 0(%r2) ; cs %r0, %r1, 0(%r5) ; jglh 0b ; 1: +; rll %r2, %r0, 8(%r11) +; lmg %r11, %r15, 88(%r15) ; br %r14 function %atomic_rmw_add_i64(i64, i64) -> i64 { @@ -87,13 +89,13 @@ block0(v0: i64, v1: i64, v2: i16): } ; block0: -; lgr %r5, %r4 -; sllk %r4, %r3, 3 -; nill %r3, 65532 -; sllk %r5, %r5, 16 -; l %r0, 0(%r3) -; 0: rll %r1, %r0, 0(%r4) ; ar %r1, %r5 ; rll %r1, %r1, 0(%r4) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: -; rll %r2, %r0, 16(%r4) +; sllk %r2, %r3, 3 +; lgr %r5, %r3 +; nill %r5, 65532 +; sllk %r4, %r4, 16 +; l %r0, 0(%r5) +; 0: rll %r1, %r0, 0(%r2) ; ar %r1, %r4 ; rll %r1, %r1, 0(%r2) ; cs %r0, %r1, 0(%r5) ; jglh 0b ; 1: +; rll %r2, %r0, 16(%r2) ; br %r14 function %atomic_rmw_add_i8(i64, i64, i8) -> i8 { @@ -102,14 +104,17 @@ block0(v0: i64, v1: i64, v2: i8): return v3 } +; stmg %r11, %r15, 88(%r15) ; block0: -; sllk %r2, %r3, 3 -; nill %r3, 65532 -; sllk %r5, %r4, 24 -; lcr %r4, %r2 -; l %r0, 0(%r3) -; 0: rll %r1, %r0, 0(%r2) ; ar %r1, %r5 ; rll %r1, %r1, 0(%r4) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: -; rll %r2, %r0, 8(%r2) +; sllk %r11, %r3, 3 +; lgr %r5, %r3 +; nill %r5, 65532 +; sllk %r4, %r4, 24 +; lcr %r2, %r11 +; l %r0, 0(%r5) +; 0: rll %r1, %r0, 0(%r11) ; ar %r1, %r4 ; rll %r1, %r1, 0(%r2) ; cs %r0, %r1, 0(%r5) ; jglh 0b ; 1: +; rll %r2, %r0, 8(%r11) +; lmg %r11, %r15, 88(%r15) ; br %r14 function %atomic_rmw_sub_i64(i64, i64) -> i64 { @@ -141,13 +146,13 @@ block0(v0: i64, v1: i64, v2: i16): } ; block0: -; lgr %r5, %r4 -; sllk %r4, %r3, 3 -; nill %r3, 65532 -; sllk %r5, %r5, 16 -; l %r0, 0(%r3) -; 0: rll %r1, %r0, 0(%r4) ; sr %r1, %r5 ; rll %r1, %r1, 0(%r4) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: -; rll %r2, %r0, 16(%r4) +; sllk %r2, %r3, 3 +; lgr %r5, %r3 +; nill %r5, 65532 +; sllk %r4, %r4, 16 +; l %r0, 0(%r5) +; 0: rll %r1, %r0, 0(%r2) ; sr %r1, %r4 ; rll %r1, %r1, 0(%r2) ; cs %r0, %r1, 0(%r5) ; jglh 0b ; 1: +; rll %r2, %r0, 16(%r2) ; br %r14 function %atomic_rmw_sub_i8(i64, i64, i8) -> i8 { @@ -156,14 +161,17 @@ block0(v0: i64, v1: i64, v2: i8): return v3 } +; stmg %r11, %r15, 88(%r15) ; block0: -; sllk %r2, %r3, 3 -; nill %r3, 65532 -; sllk %r5, %r4, 24 -; lcr %r4, %r2 -; l %r0, 0(%r3) -; 0: rll %r1, %r0, 0(%r2) ; sr %r1, %r5 ; rll %r1, %r1, 0(%r4) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: -; rll %r2, %r0, 8(%r2) +; sllk %r11, %r3, 3 +; lgr %r5, %r3 +; nill %r5, 65532 +; sllk %r4, %r4, 24 +; lcr %r2, %r11 +; l %r0, 0(%r5) +; 0: rll %r1, %r0, 0(%r11) ; sr %r1, %r4 ; rll %r1, %r1, 0(%r2) ; cs %r0, %r1, 0(%r5) ; jglh 0b ; 1: +; rll %r2, %r0, 8(%r11) +; lmg %r11, %r15, 88(%r15) ; br %r14 function %atomic_rmw_and_i64(i64, i64) -> i64 { @@ -193,12 +201,12 @@ block0(v0: i64, v1: i64, v2: i16): } ; block0: -; lgr %r5, %r4 -; sllk %r4, %r3, 3 -; nill %r3, 65532 -; l %r0, 0(%r3) -; 0: rll %r1, %r0, 0(%r4) ; rnsbg %r1, %r5, 32, 48, 16 ; rll %r1, %r1, 0(%r4) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: -; rll %r2, %r0, 16(%r4) +; sllk %r2, %r3, 3 +; lgr %r5, %r3 +; nill %r5, 65532 +; l %r0, 0(%r5) +; 0: rll %r1, %r0, 0(%r2) ; rnsbg %r1, %r4, 32, 48, 16 ; rll %r1, %r1, 0(%r2) ; cs %r0, %r1, 0(%r5) ; jglh 0b ; 1: +; rll %r2, %r0, 16(%r2) ; br %r14 function %atomic_rmw_and_i8(i64, i64, i8) -> i8 { @@ -207,14 +215,16 @@ block0(v0: i64, v1: i64, v2: i8): return v3 } +; stmg %r11, %r15, 88(%r15) ; block0: -; lgr %r2, %r4 -; sllk %r4, %r3, 3 -; nill %r3, 65532 -; lcr %r5, %r4 -; l %r0, 0(%r3) -; 0: rll %r1, %r0, 0(%r4) ; rnsbg %r1, %r2, 32, 40, 24 ; rll %r1, %r1, 0(%r5) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: -; rll %r2, %r0, 8(%r4) +; sllk %r11, %r3, 3 +; lgr %r5, %r3 +; nill %r5, 65532 +; lcr %r2, %r11 +; l %r0, 0(%r5) +; 0: rll %r1, %r0, 0(%r11) ; rnsbg %r1, %r4, 32, 40, 24 ; rll %r1, %r1, 0(%r2) ; cs %r0, %r1, 0(%r5) ; jglh 0b ; 1: +; rll %r2, %r0, 8(%r11) +; lmg %r11, %r15, 88(%r15) ; br %r14 function %atomic_rmw_or_i64(i64, i64) -> i64 { @@ -244,12 +254,12 @@ block0(v0: i64, v1: i64, v2: i16): } ; block0: -; lgr %r5, %r4 -; sllk %r4, %r3, 3 -; nill %r3, 65532 -; l %r0, 0(%r3) -; 0: rll %r1, %r0, 0(%r4) ; rosbg %r1, %r5, 32, 48, 16 ; rll %r1, %r1, 0(%r4) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: -; rll %r2, %r0, 16(%r4) +; sllk %r2, %r3, 3 +; lgr %r5, %r3 +; nill %r5, 65532 +; l %r0, 0(%r5) +; 0: rll %r1, %r0, 0(%r2) ; rosbg %r1, %r4, 32, 48, 16 ; rll %r1, %r1, 0(%r2) ; cs %r0, %r1, 0(%r5) ; jglh 0b ; 1: +; rll %r2, %r0, 16(%r2) ; br %r14 function %atomic_rmw_or_i8(i64, i64, i8) -> i8 { @@ -258,14 +268,16 @@ block0(v0: i64, v1: i64, v2: i8): return v3 } +; stmg %r11, %r15, 88(%r15) ; block0: -; lgr %r2, %r4 -; sllk %r4, %r3, 3 -; nill %r3, 65532 -; lcr %r5, %r4 -; l %r0, 0(%r3) -; 0: rll %r1, %r0, 0(%r4) ; rosbg %r1, %r2, 32, 40, 24 ; rll %r1, %r1, 0(%r5) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: -; rll %r2, %r0, 8(%r4) +; sllk %r11, %r3, 3 +; lgr %r5, %r3 +; nill %r5, 65532 +; lcr %r2, %r11 +; l %r0, 0(%r5) +; 0: rll %r1, %r0, 0(%r11) ; rosbg %r1, %r4, 32, 40, 24 ; rll %r1, %r1, 0(%r2) ; cs %r0, %r1, 0(%r5) ; jglh 0b ; 1: +; rll %r2, %r0, 8(%r11) +; lmg %r11, %r15, 88(%r15) ; br %r14 function %atomic_rmw_xor_i64(i64, i64) -> i64 { @@ -295,12 +307,12 @@ block0(v0: i64, v1: i64, v2: i16): } ; block0: -; lgr %r5, %r4 -; sllk %r4, %r3, 3 -; nill %r3, 65532 -; l %r0, 0(%r3) -; 0: rll %r1, %r0, 0(%r4) ; rxsbg %r1, %r5, 32, 48, 16 ; rll %r1, %r1, 0(%r4) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: -; rll %r2, %r0, 16(%r4) +; sllk %r2, %r3, 3 +; lgr %r5, %r3 +; nill %r5, 65532 +; l %r0, 0(%r5) +; 0: rll %r1, %r0, 0(%r2) ; rxsbg %r1, %r4, 32, 48, 16 ; rll %r1, %r1, 0(%r2) ; cs %r0, %r1, 0(%r5) ; jglh 0b ; 1: +; rll %r2, %r0, 16(%r2) ; br %r14 function %atomic_rmw_xor_i8(i64, i64, i8) -> i8 { @@ -309,14 +321,16 @@ block0(v0: i64, v1: i64, v2: i8): return v3 } +; stmg %r11, %r15, 88(%r15) ; block0: -; lgr %r2, %r4 -; sllk %r4, %r3, 3 -; nill %r3, 65532 -; lcr %r5, %r4 -; l %r0, 0(%r3) -; 0: rll %r1, %r0, 0(%r4) ; rxsbg %r1, %r2, 32, 40, 24 ; rll %r1, %r1, 0(%r5) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: -; rll %r2, %r0, 8(%r4) +; sllk %r11, %r3, 3 +; lgr %r5, %r3 +; nill %r5, 65532 +; lcr %r2, %r11 +; l %r0, 0(%r5) +; 0: rll %r1, %r0, 0(%r11) ; rxsbg %r1, %r4, 32, 40, 24 ; rll %r1, %r1, 0(%r2) ; cs %r0, %r1, 0(%r5) ; jglh 0b ; 1: +; rll %r2, %r0, 8(%r11) +; lmg %r11, %r15, 88(%r15) ; br %r14 function %atomic_rmw_nand_i64(i64, i64, i64) -> i64 { @@ -350,12 +364,12 @@ block0(v0: i64, v1: i64, v2: i16): } ; block0: -; lgr %r5, %r4 -; sllk %r4, %r3, 3 -; nill %r3, 65532 -; l %r0, 0(%r3) -; 0: rll %r1, %r0, 0(%r4) ; rnsbg %r1, %r5, 32, 48, 16 ; xilf %r1, 4294901760 ; rll %r1, %r1, 0(%r4) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: -; rll %r2, %r0, 16(%r4) +; sllk %r2, %r3, 3 +; lgr %r5, %r3 +; nill %r5, 65532 +; l %r0, 0(%r5) +; 0: rll %r1, %r0, 0(%r2) ; rnsbg %r1, %r4, 32, 48, 16 ; xilf %r1, 4294901760 ; rll %r1, %r1, 0(%r2) ; cs %r0, %r1, 0(%r5) ; jglh 0b ; 1: +; rll %r2, %r0, 16(%r2) ; br %r14 function %atomic_rmw_nand_i8(i64, i64, i8) -> i8 { @@ -364,14 +378,16 @@ block0(v0: i64, v1: i64, v2: i8): return v3 } +; stmg %r11, %r15, 88(%r15) ; block0: -; lgr %r2, %r4 -; sllk %r4, %r3, 3 -; nill %r3, 65532 -; lcr %r5, %r4 -; l %r0, 0(%r3) -; 0: rll %r1, %r0, 0(%r4) ; rnsbg %r1, %r2, 32, 40, 24 ; xilf %r1, 4278190080 ; rll %r1, %r1, 0(%r5) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: -; rll %r2, %r0, 8(%r4) +; sllk %r11, %r3, 3 +; lgr %r5, %r3 +; nill %r5, 65532 +; lcr %r2, %r11 +; l %r0, 0(%r5) +; 0: rll %r1, %r0, 0(%r11) ; rnsbg %r1, %r4, 32, 40, 24 ; xilf %r1, 4278190080 ; rll %r1, %r1, 0(%r2) ; cs %r0, %r1, 0(%r5) ; jglh 0b ; 1: +; rll %r2, %r0, 8(%r11) +; lmg %r11, %r15, 88(%r15) ; br %r14 function %atomic_rmw_smin_i64(i64, i64, i64) -> i64 { @@ -405,13 +421,13 @@ block0(v0: i64, v1: i64, v2: i16): } ; block0: -; lgr %r5, %r4 -; sllk %r4, %r3, 3 -; nill %r3, 65532 -; sllk %r5, %r5, 16 -; l %r0, 0(%r3) -; 0: rll %r1, %r0, 0(%r4) ; cr %r5, %r1 ; jgnl 1f ; risbgn %r1, %r5, 32, 48, 0 ; rll %r1, %r1, 0(%r4) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: -; rll %r2, %r0, 16(%r4) +; sllk %r2, %r3, 3 +; lgr %r5, %r3 +; nill %r5, 65532 +; sllk %r4, %r4, 16 +; l %r0, 0(%r5) +; 0: rll %r1, %r0, 0(%r2) ; cr %r4, %r1 ; jgnl 1f ; risbgn %r1, %r4, 32, 48, 0 ; rll %r1, %r1, 0(%r2) ; cs %r0, %r1, 0(%r5) ; jglh 0b ; 1: +; rll %r2, %r0, 16(%r2) ; br %r14 function %atomic_rmw_smin_i8(i64, i64, i8) -> i8 { @@ -420,14 +436,17 @@ block0(v0: i64, v1: i64, v2: i8): return v3 } +; stmg %r11, %r15, 88(%r15) ; block0: -; sllk %r2, %r3, 3 -; nill %r3, 65532 -; sllk %r5, %r4, 24 -; lcr %r4, %r2 -; l %r0, 0(%r3) -; 0: rll %r1, %r0, 0(%r2) ; cr %r5, %r1 ; jgnl 1f ; risbgn %r1, %r5, 32, 40, 0 ; rll %r1, %r1, 0(%r4) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: -; rll %r2, %r0, 8(%r2) +; sllk %r11, %r3, 3 +; lgr %r5, %r3 +; nill %r5, 65532 +; sllk %r4, %r4, 24 +; lcr %r2, %r11 +; l %r0, 0(%r5) +; 0: rll %r1, %r0, 0(%r11) ; cr %r4, %r1 ; jgnl 1f ; risbgn %r1, %r4, 32, 40, 0 ; rll %r1, %r1, 0(%r2) ; cs %r0, %r1, 0(%r5) ; jglh 0b ; 1: +; rll %r2, %r0, 8(%r11) +; lmg %r11, %r15, 88(%r15) ; br %r14 function %atomic_rmw_smax_i64(i64, i64, i64) -> i64 { @@ -461,13 +480,13 @@ block0(v0: i64, v1: i64, v2: i16): } ; block0: -; lgr %r5, %r4 -; sllk %r4, %r3, 3 -; nill %r3, 65532 -; sllk %r5, %r5, 16 -; l %r0, 0(%r3) -; 0: rll %r1, %r0, 0(%r4) ; cr %r5, %r1 ; jgnh 1f ; risbgn %r1, %r5, 32, 48, 0 ; rll %r1, %r1, 0(%r4) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: -; rll %r2, %r0, 16(%r4) +; sllk %r2, %r3, 3 +; lgr %r5, %r3 +; nill %r5, 65532 +; sllk %r4, %r4, 16 +; l %r0, 0(%r5) +; 0: rll %r1, %r0, 0(%r2) ; cr %r4, %r1 ; jgnh 1f ; risbgn %r1, %r4, 32, 48, 0 ; rll %r1, %r1, 0(%r2) ; cs %r0, %r1, 0(%r5) ; jglh 0b ; 1: +; rll %r2, %r0, 16(%r2) ; br %r14 function %atomic_rmw_smax_i8(i64, i64, i8) -> i8 { @@ -476,14 +495,17 @@ block0(v0: i64, v1: i64, v2: i8): return v3 } +; stmg %r11, %r15, 88(%r15) ; block0: -; sllk %r2, %r3, 3 -; nill %r3, 65532 -; sllk %r5, %r4, 24 -; lcr %r4, %r2 -; l %r0, 0(%r3) -; 0: rll %r1, %r0, 0(%r2) ; cr %r5, %r1 ; jgnh 1f ; risbgn %r1, %r5, 32, 40, 0 ; rll %r1, %r1, 0(%r4) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: -; rll %r2, %r0, 8(%r2) +; sllk %r11, %r3, 3 +; lgr %r5, %r3 +; nill %r5, 65532 +; sllk %r4, %r4, 24 +; lcr %r2, %r11 +; l %r0, 0(%r5) +; 0: rll %r1, %r0, 0(%r11) ; cr %r4, %r1 ; jgnh 1f ; risbgn %r1, %r4, 32, 40, 0 ; rll %r1, %r1, 0(%r2) ; cs %r0, %r1, 0(%r5) ; jglh 0b ; 1: +; rll %r2, %r0, 8(%r11) +; lmg %r11, %r15, 88(%r15) ; br %r14 function %atomic_rmw_umin_i64(i64, i64, i64) -> i64 { @@ -517,13 +539,13 @@ block0(v0: i64, v1: i64, v2: i16): } ; block0: -; lgr %r5, %r4 -; sllk %r4, %r3, 3 -; nill %r3, 65532 -; sllk %r5, %r5, 16 -; l %r0, 0(%r3) -; 0: rll %r1, %r0, 0(%r4) ; clr %r5, %r1 ; jgnl 1f ; risbgn %r1, %r5, 32, 48, 0 ; rll %r1, %r1, 0(%r4) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: -; rll %r2, %r0, 16(%r4) +; sllk %r2, %r3, 3 +; lgr %r5, %r3 +; nill %r5, 65532 +; sllk %r4, %r4, 16 +; l %r0, 0(%r5) +; 0: rll %r1, %r0, 0(%r2) ; clr %r4, %r1 ; jgnl 1f ; risbgn %r1, %r4, 32, 48, 0 ; rll %r1, %r1, 0(%r2) ; cs %r0, %r1, 0(%r5) ; jglh 0b ; 1: +; rll %r2, %r0, 16(%r2) ; br %r14 function %atomic_rmw_umin_i8(i64, i64, i8) -> i8 { @@ -532,14 +554,17 @@ block0(v0: i64, v1: i64, v2: i8): return v3 } +; stmg %r11, %r15, 88(%r15) ; block0: -; sllk %r2, %r3, 3 -; nill %r3, 65532 -; sllk %r5, %r4, 24 -; lcr %r4, %r2 -; l %r0, 0(%r3) -; 0: rll %r1, %r0, 0(%r2) ; clr %r5, %r1 ; jgnl 1f ; risbgn %r1, %r5, 32, 40, 0 ; rll %r1, %r1, 0(%r4) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: -; rll %r2, %r0, 8(%r2) +; sllk %r11, %r3, 3 +; lgr %r5, %r3 +; nill %r5, 65532 +; sllk %r4, %r4, 24 +; lcr %r2, %r11 +; l %r0, 0(%r5) +; 0: rll %r1, %r0, 0(%r11) ; clr %r4, %r1 ; jgnl 1f ; risbgn %r1, %r4, 32, 40, 0 ; rll %r1, %r1, 0(%r2) ; cs %r0, %r1, 0(%r5) ; jglh 0b ; 1: +; rll %r2, %r0, 8(%r11) +; lmg %r11, %r15, 88(%r15) ; br %r14 function %atomic_rmw_umax_i64(i64, i64, i64) -> i64 { @@ -573,13 +598,13 @@ block0(v0: i64, v1: i64, v2: i16): } ; block0: -; lgr %r5, %r4 -; sllk %r4, %r3, 3 -; nill %r3, 65532 -; sllk %r5, %r5, 16 -; l %r0, 0(%r3) -; 0: rll %r1, %r0, 0(%r4) ; clr %r5, %r1 ; jgnh 1f ; risbgn %r1, %r5, 32, 48, 0 ; rll %r1, %r1, 0(%r4) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: -; rll %r2, %r0, 16(%r4) +; sllk %r2, %r3, 3 +; lgr %r5, %r3 +; nill %r5, 65532 +; sllk %r4, %r4, 16 +; l %r0, 0(%r5) +; 0: rll %r1, %r0, 0(%r2) ; clr %r4, %r1 ; jgnh 1f ; risbgn %r1, %r4, 32, 48, 0 ; rll %r1, %r1, 0(%r2) ; cs %r0, %r1, 0(%r5) ; jglh 0b ; 1: +; rll %r2, %r0, 16(%r2) ; br %r14 function %atomic_rmw_umax_i8(i64, i64, i8) -> i8 { @@ -588,13 +613,16 @@ block0(v0: i64, v1: i64, v2: i8): return v3 } +; stmg %r11, %r15, 88(%r15) ; block0: -; sllk %r2, %r3, 3 -; nill %r3, 65532 -; sllk %r5, %r4, 24 -; lcr %r4, %r2 -; l %r0, 0(%r3) -; 0: rll %r1, %r0, 0(%r2) ; clr %r5, %r1 ; jgnh 1f ; risbgn %r1, %r5, 32, 40, 0 ; rll %r1, %r1, 0(%r4) ; cs %r0, %r1, 0(%r3) ; jglh 0b ; 1: -; rll %r2, %r0, 8(%r2) +; sllk %r11, %r3, 3 +; lgr %r5, %r3 +; nill %r5, 65532 +; sllk %r4, %r4, 24 +; lcr %r2, %r11 +; l %r0, 0(%r5) +; 0: rll %r1, %r0, 0(%r11) ; clr %r4, %r1 ; jgnh 1f ; risbgn %r1, %r4, 32, 40, 0 ; rll %r1, %r1, 0(%r2) ; cs %r0, %r1, 0(%r5) ; jglh 0b ; 1: +; rll %r2, %r0, 8(%r11) +; lmg %r11, %r15, 88(%r15) ; br %r14 diff --git a/cranelift/filetests/filetests/isa/s390x/bitops.clif b/cranelift/filetests/filetests/isa/s390x/bitops.clif index 6ea451147d..2ea8f3b5a5 100644 --- a/cranelift/filetests/filetests/isa/s390x/bitops.clif +++ b/cranelift/filetests/filetests/isa/s390x/bitops.clif @@ -347,9 +347,10 @@ block0(v0: i32): } ; block0: -; oihl %r2, 1 -; lcgr %r3, %r2 -; ngrk %r5, %r2, %r3 +; lgr %r5, %r2 +; oihl %r5, 1 +; lcgr %r3, %r5 +; ngr %r5, %r3 ; flogr %r0, %r5 ; lhi %r4, 63 ; srk %r2, %r4, %r0 @@ -362,9 +363,10 @@ block0(v0: i16): } ; block0: -; oilh %r2, 1 -; lcgr %r3, %r2 -; ngrk %r5, %r2, %r3 +; lgr %r5, %r2 +; oilh %r5, 1 +; lcgr %r3, %r5 +; ngr %r5, %r3 ; flogr %r0, %r5 ; lhi %r4, 63 ; srk %r2, %r4, %r0 @@ -377,9 +379,10 @@ block0(v0: i8): } ; block0: -; oill %r2, 256 -; lcgr %r3, %r2 -; ngrk %r5, %r2, %r3 +; lgr %r5, %r2 +; oill %r5, 256 +; lcgr %r3, %r5 +; ngr %r5, %r3 ; flogr %r0, %r5 ; lhi %r4, 63 ; srk %r2, %r4, %r0 diff --git a/cranelift/filetests/filetests/isa/s390x/bitwise.clif b/cranelift/filetests/filetests/isa/s390x/bitwise.clif index affc376914..8ea3b5bbb0 100644 --- a/cranelift/filetests/filetests/isa/s390x/bitwise.clif +++ b/cranelift/filetests/filetests/isa/s390x/bitwise.clif @@ -597,8 +597,9 @@ block0(v0: i32, v1: i32, v2: i32): ; block0: ; nr %r3, %r2 -; xilf %r2, 4294967295 -; nr %r4, %r2 +; lgr %r5, %r2 +; xilf %r5, 4294967295 +; nr %r4, %r5 ; ork %r2, %r4, %r3 ; br %r14 @@ -610,8 +611,9 @@ block0(v0: i16, v1: i16, v2: i16): ; block0: ; nr %r3, %r2 -; xilf %r2, 4294967295 -; nr %r4, %r2 +; lgr %r5, %r2 +; xilf %r5, 4294967295 +; nr %r4, %r5 ; ork %r2, %r4, %r3 ; br %r14 @@ -623,8 +625,9 @@ block0(v0: i8, v1: i8, v2: i8): ; block0: ; nr %r3, %r2 -; xilf %r2, 4294967295 -; nr %r4, %r2 +; lgr %r5, %r2 +; xilf %r5, 4294967295 +; nr %r4, %r5 ; ork %r2, %r4, %r3 ; br %r14 diff --git a/cranelift/filetests/filetests/isa/s390x/call.clif b/cranelift/filetests/filetests/isa/s390x/call.clif index dbfe73b088..7dfd72f031 100644 --- a/cranelift/filetests/filetests/isa/s390x/call.clif +++ b/cranelift/filetests/filetests/isa/s390x/call.clif @@ -46,7 +46,8 @@ block0(v0: i32): } ; block0: -; llgfr %r2, %r2 +; lgr %r5, %r2 +; llgfr %r2, %r5 ; br %r14 function %call_uext(i32) -> i64 { @@ -73,7 +74,8 @@ block0(v0: i32): } ; block0: -; lgfr %r2, %r2 +; lgr %r5, %r2 +; lgfr %r2, %r5 ; br %r14 function %call_colocated(i64) -> i64 { diff --git a/cranelift/filetests/filetests/isa/s390x/shift-rotate.clif b/cranelift/filetests/filetests/isa/s390x/shift-rotate.clif index 4163806b99..57a118a7db 100644 --- a/cranelift/filetests/filetests/isa/s390x/shift-rotate.clif +++ b/cranelift/filetests/filetests/isa/s390x/shift-rotate.clif @@ -153,13 +153,14 @@ block0(v0: i16, v1: i16): } ; block0: +; lgr %r4, %r3 ; llhr %r2, %r2 -; lcr %r4, %r3 -; nill %r3, 15 +; lcr %r3, %r4 ; nill %r4, 15 -; sllk %r4, %r2, 0(%r4) -; srlk %r5, %r2, 0(%r3) -; ork %r2, %r4, %r5 +; nill %r3, 15 +; sllk %r3, %r2, 0(%r3) +; srlk %r4, %r2, 0(%r4) +; ork %r2, %r3, %r4 ; br %r14 function %rotr_i16_imm(i16) -> i16 { @@ -201,13 +202,14 @@ block0(v0: i8, v1: i8): } ; block0: +; lgr %r4, %r3 ; llcr %r2, %r2 -; lcr %r4, %r3 -; nill %r3, 7 +; lcr %r3, %r4 ; nill %r4, 7 -; sllk %r4, %r2, 0(%r4) -; srlk %r5, %r2, 0(%r3) -; ork %r2, %r4, %r5 +; nill %r3, 7 +; sllk %r3, %r2, 0(%r3) +; srlk %r4, %r2, 0(%r4) +; ork %r2, %r3, %r4 ; br %r14 function %rotr_i8_imm(i8) -> i8 { @@ -578,8 +580,9 @@ block0(v0: i16, v1: i16): ; block0: ; llhr %r2, %r2 -; nill %r3, 15 -; srlk %r2, %r2, 0(%r3) +; lgr %r4, %r3 +; nill %r4, 15 +; srlk %r2, %r2, 0(%r4) ; br %r14 function %ushr_i16_imm(i16) -> i16 { @@ -616,8 +619,9 @@ block0(v0: i8, v1: i8): ; block0: ; llcr %r2, %r2 -; nill %r3, 7 -; srlk %r2, %r2, 0(%r3) +; lgr %r4, %r3 +; nill %r4, 7 +; srlk %r2, %r2, 0(%r4) ; br %r14 function %ushr_i8_imm(i8) -> i8 { @@ -950,8 +954,9 @@ block0(v0: i16, v1: i16): ; block0: ; lhr %r2, %r2 -; nill %r3, 15 -; srak %r2, %r2, 0(%r3) +; lgr %r4, %r3 +; nill %r4, 15 +; srak %r2, %r2, 0(%r4) ; br %r14 function %sshr_i16_imm(i16) -> i16 { @@ -988,8 +993,9 @@ block0(v0: i8, v1: i8): ; block0: ; lbr %r2, %r2 -; nill %r3, 7 -; srak %r2, %r2, 0(%r3) +; lgr %r4, %r3 +; nill %r4, 7 +; srak %r2, %r2, 0(%r4) ; br %r14 function %sshr_i8_imm(i8) -> i8 { diff --git a/cranelift/filetests/filetests/isa/x64/amode-opt.clif b/cranelift/filetests/filetests/isa/x64/amode-opt.clif index 7c7b315955..72bb63d321 100644 --- a/cranelift/filetests/filetests/isa/x64/amode-opt.clif +++ b/cranelift/filetests/filetests/isa/x64/amode-opt.clif @@ -153,8 +153,9 @@ block0(v0: i64, v1: i32, v2: i32): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; addl %esi, %edx, %esi -; movq -1(%rdi,%rsi,4), %rax +; movq %rsi, %r9 +; addl %r9d, %edx, %r9d +; movq -1(%rdi,%r9,4), %rax ; movq %rbp, %rsp ; popq %rbp ; ret diff --git a/cranelift/filetests/filetests/isa/x64/b1.clif b/cranelift/filetests/filetests/isa/x64/b1.clif index d790d72d94..65cf14c407 100644 --- a/cranelift/filetests/filetests/isa/x64/b1.clif +++ b/cranelift/filetests/filetests/isa/x64/b1.clif @@ -11,8 +11,8 @@ block0(v0: b1, v1: i32, v2: i32): ; movq %rsp, %rbp ; block0: ; testb $1, %dil -; cmovnzl %esi, %edx, %edx ; movq %rdx, %rax +; cmovnzl %esi, %eax, %eax ; movq %rbp, %rsp ; popq %rbp ; ret @@ -147,8 +147,8 @@ block0(v0: i64): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; shrq $63, %rdi, %rdi ; movq %rdi, %rax +; shrq $63, %rax, %rax ; movq %rbp, %rsp ; popq %rbp ; ret @@ -163,8 +163,8 @@ block0(v0: i32): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; shrl $31, %edi, %edi ; movq %rdi, %rax +; shrl $31, %eax, %eax ; movq %rbp, %rsp ; popq %rbp ; ret @@ -179,8 +179,8 @@ block0(v0: i64): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; shrq $63, %rdi, %rdi ; movq %rdi, %rax +; shrq $63, %rax, %rax ; movq %rbp, %rsp ; popq %rbp ; ret @@ -195,8 +195,8 @@ block0(v0: i32): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; shrl $31, %edi, %edi ; movq %rdi, %rax +; shrl $31, %eax, %eax ; movq %rbp, %rsp ; popq %rbp ; ret @@ -211,9 +211,9 @@ block0(v0: i64): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; notq %rdi, %rdi -; shrq $63, %rdi, %rdi ; movq %rdi, %rax +; notq %rax, %rax +; shrq $63, %rax, %rax ; movq %rbp, %rsp ; popq %rbp ; ret @@ -228,9 +228,9 @@ block0(v0: i32): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; notq %rdi, %rdi -; shrl $31, %edi, %edi ; movq %rdi, %rax +; notq %rax, %rax +; shrl $31, %eax, %eax ; movq %rbp, %rsp ; popq %rbp ; ret @@ -245,9 +245,9 @@ block0(v0: i64): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; notq %rdi, %rdi -; shrq $63, %rdi, %rdi ; movq %rdi, %rax +; notq %rax, %rax +; shrq $63, %rax, %rax ; movq %rbp, %rsp ; popq %rbp ; ret @@ -262,9 +262,9 @@ block0(v0: i32): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; notq %rdi, %rdi -; shrl $31, %edi, %edi ; movq %rdi, %rax +; notq %rax, %rax +; shrl $31, %eax, %eax ; movq %rbp, %rsp ; popq %rbp ; ret diff --git a/cranelift/filetests/filetests/isa/x64/basic.clif b/cranelift/filetests/filetests/isa/x64/basic.clif index ba779d4cf6..ad20bcc4f0 100644 --- a/cranelift/filetests/filetests/isa/x64/basic.clif +++ b/cranelift/filetests/filetests/isa/x64/basic.clif @@ -10,8 +10,8 @@ block0(v0: i32, v1: i32): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; addl %edi, %esi, %edi ; movq %rdi, %rax +; addl %eax, %esi, %eax ; movq %rbp, %rsp ; popq %rbp ; ret diff --git a/cranelift/filetests/filetests/isa/x64/call-conv.clif b/cranelift/filetests/filetests/isa/x64/call-conv.clif index 6ceb2225e6..6586bacce9 100644 --- a/cranelift/filetests/filetests/isa/x64/call-conv.clif +++ b/cranelift/filetests/filetests/isa/x64/call-conv.clif @@ -12,11 +12,10 @@ block0(v0: i32): ; pushq %rbp ; movq %rsp, %rbp ; block0: +; movq %rdi, %rcx ; subq %rsp, $32, %rsp ; virtual_sp_offset_adjust 32 -; movq %rdi, %rcx -; movq %rcx, %rdi -; call *%rdi +; call *%rcx ; addq %rsp, $32, %rsp ; virtual_sp_offset_adjust -32 ; movq %rbp, %rsp @@ -36,20 +35,16 @@ block0(v0: i32, v1: f32): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; movq %rdi, %r10 ; movdqa %xmm0, %xmm6 ; subq %rsp, $32, %rsp ; virtual_sp_offset_adjust 32 -; movq %r10, %rcx +; movq %rdi, %rcx ; movdqa %xmm6, %xmm1 -; movq %r10, %rdi -; movdqa %xmm1, %xmm6 ; call *%rdi ; addq %rsp, $32, %rsp ; virtual_sp_offset_adjust -32 -; movq %rdi, %r10 ; movdqa %xmm6, %xmm0 -; call *%r10 +; call *%rdi ; movq %rbp, %rsp ; popq %rbp ; ret @@ -130,22 +125,19 @@ block0( ; pushq %rbp ; movq %rsp, %rbp ; block0: -; movq %rdx, %r10 +; movq %rcx, %rax +; movq %rdx, %rcx ; movq %rsi, %rdx -; movq %r8, %rsi -; movq %r10, %r8 -; movq %r9, %rax -; movq %rcx, %r9 +; movq %rdi, %rsi +; movq %rax, %rdi ; movq 16(%rbp), %r11 ; movq 24(%rbp), %r10 ; movss 32(%rbp), %xmm9 ; movsd 40(%rbp), %xmm8 ; subq %rsp, $144, %rsp ; virtual_sp_offset_adjust 144 -; movq %rdi, %rcx -; movq %rsi, 32(%rsp) -; movq %rax, %rsi -; movq %rsi, 40(%rsp) +; movq %r8, 32(%rsp) +; movq %r9, 40(%rsp) ; movsd %xmm0, 48(%rsp) ; movsd %xmm1, 56(%rsp) ; movsd %xmm2, 64(%rsp) @@ -158,7 +150,10 @@ block0( ; movl %r10d, 120(%rsp) ; movss %xmm9, 128(%rsp) ; movsd %xmm8, 136(%rsp) -; call *%rdi +; movq %rdi, %r9 +; movq %rcx, %r8 +; movq %rsi, %rcx +; call *%rcx ; addq %rsp, $144, %rsp ; virtual_sp_offset_adjust -144 ; movq %rbp, %rsp @@ -180,16 +175,15 @@ block0(v0: i64, v1:i64, v2:i64, v3:i64, v4:i64): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; movq %r8, %r10 -; movq %rdx, %r8 -; movq %rcx, %rax +; movq %rdx, %r11 +; movq %rcx, %r9 +; movq %rsi, %rdx +; movq %rdi, %rcx ; subq %rsp, $48, %rsp ; virtual_sp_offset_adjust 48 -; movq %rdi, %rcx -; movq %rsi, %rdx -; movq %rax, %r9 -; movq %r10, 32(%rsp) -; call *%rdi +; movq %r8, 32(%rsp) +; movq %r11, %r8 +; call *%rcx ; addq %rsp, $48, %rsp ; virtual_sp_offset_adjust -48 ; movq %rbp, %rsp @@ -206,24 +200,23 @@ block0(v0: i32, v1: f32, v2: i64, v3: f64, v4: i32, v5: i32, v6: i32, v7: f32, v ; pushq %rbp ; movq %rsp, %rbp ; block0: -; movq %rcx, %r10 -; movq %r8, %r9 -; movdqa %xmm1, %xmm6 -; movdqa %xmm3, %xmm8 +; movq %rsi, %r9 +; movq %rdi, %rsi +; movdqa %xmm1, %xmm12 +; movdqa %xmm0, %xmm1 ; subq %rsp, $96, %rsp ; virtual_sp_offset_adjust 96 -; movq %rdi, %rcx -; movdqa %xmm0, %xmm1 -; movq %rsi, %r8 -; movdqa %xmm6, %xmm3 ; movl %edx, 32(%rsp) -; movl %r10d, 40(%rsp) -; movl %r9d, 48(%rsp) +; movl %ecx, 40(%rsp) +; movl %r8d, 48(%rsp) ; movss %xmm2, 56(%rsp) -; movsd %xmm8, 64(%rsp) +; movsd %xmm3, 64(%rsp) ; movss %xmm4, 72(%rsp) ; movsd %xmm5, 80(%rsp) -; call *%rdi +; movq %rsi, %rcx +; movq %r9, %r8 +; movdqa %xmm12, %xmm3 +; call *%rcx ; addq %rsp, $96, %rsp ; virtual_sp_offset_adjust -96 ; movq %rbp, %rsp @@ -240,8 +233,7 @@ block0(v0: i32): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; movq %rdi, %rcx -; call *%rcx +; call *%rdi ; movq %rbp, %rsp ; popq %rbp ; ret @@ -256,8 +248,7 @@ block0(v0: i32): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; movq %rdi, %rcx -; call *%rcx +; call *%rdi ; movq %rbp, %rsp ; popq %rbp ; ret @@ -301,8 +292,9 @@ block0: ; movq %rdi, %r13 ; movl $1, %edx ; call *%rdx +; movq %rdx, %r9 ; movq %r13, %rdi -; movl %edx, 0(%rdi) +; movl %r9d, 0(%rdi) ; movq 0(%rsp), %r13 ; addq %rsp, $16, %rsp ; movq %rbp, %rsp @@ -328,12 +320,13 @@ block0: ; virtual_sp_offset_adjust 16 ; lea 0(%rsp), %rdi ; call *%r8 -; movq 0(%rsp), %r11 +; movq %rdx, %rcx +; movq 0(%rsp), %rdx ; addq %rsp, $16, %rsp ; virtual_sp_offset_adjust -16 ; movq %rbx, %rdi -; movq %rdx, 0(%rdi) -; movl %r11d, 8(%rdi) +; movq %rcx, 0(%rdi) +; movl %edx, 8(%rdi) ; movq 0(%rsp), %rbx ; addq %rsp, $16, %rsp ; movq %rbp, %rsp @@ -356,10 +349,13 @@ block0: ; movq %rdi, %r12 ; movl $1, %r9d ; call *%r9 +; movq %rax, %r9 +; movq %rdx, %r11 +; movdqa %xmm1, %xmm10 ; movq %r12, %rdi -; movq %rax, 0(%rdi) -; movl %edx, 8(%rdi) -; movss %xmm1, 12(%rdi) +; movq %r9, 0(%rdi) +; movl %r11d, 8(%rdi) +; movss %xmm10, 12(%rdi) ; movq 0(%rsp), %r12 ; addq %rsp, $16, %rsp ; movq %rbp, %rsp @@ -377,16 +373,19 @@ block0(v0: f32, v1: i64, v2: i32, v3: f32): ; pushq %rbp ; movq %rsp, %rbp ; subq %rsp, $16, %rsp -; movq %rbx, 0(%rsp) +; movq %r13, 0(%rsp) ; block0: -; movq %rdx, %rbx +; movq %rdx, %r13 ; movl $1, %eax ; call *%rax -; movq %rbx, %rcx -; movq %rax, 0(%rcx) -; movl %edx, 8(%rcx) -; movss %xmm1, 12(%rcx) -; movq 0(%rsp), %rbx +; movq %rax, %rdi +; movq %rdx, %rcx +; movdqa %xmm1, %xmm14 +; movq %r13, %rdx +; movq %rdi, 0(%rdx) +; movl %ecx, 8(%rdx) +; movss %xmm14, 12(%rdx) +; movq 0(%rsp), %r13 ; addq %rsp, $16, %rsp ; movq %rbp, %rsp ; popq %rbp diff --git a/cranelift/filetests/filetests/isa/x64/ceil-libcall.clif b/cranelift/filetests/filetests/isa/x64/ceil-libcall.clif index f24acddcd4..2041ba21a9 100644 --- a/cranelift/filetests/filetests/isa/x64/ceil-libcall.clif +++ b/cranelift/filetests/filetests/isa/x64/ceil-libcall.clif @@ -10,8 +10,8 @@ block0(v0: f32): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; load_ext_name %CeilF32+0, %r8 -; call *%r8 +; load_ext_name %CeilF32+0, %rdx +; call *%rdx ; movq %rbp, %rsp ; popq %rbp ; ret @@ -25,8 +25,8 @@ block0(v0: f64): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; load_ext_name %CeilF64+0, %r8 -; call *%r8 +; load_ext_name %CeilF64+0, %rdx +; call *%rdx ; movq %rbp, %rsp ; popq %rbp ; ret diff --git a/cranelift/filetests/filetests/isa/x64/cmp-mem-bug.clif b/cranelift/filetests/filetests/isa/x64/cmp-mem-bug.clif index e07ea5ba36..0553a26d0a 100644 --- a/cranelift/filetests/filetests/isa/x64/cmp-mem-bug.clif +++ b/cranelift/filetests/filetests/isa/x64/cmp-mem-bug.clif @@ -18,8 +18,8 @@ block0(v0: i64, v1: i64): ; setz %al ; andq %rax, $1, %rax ; cmpq %r11, %rdi -; cmovzq %rdi, %rsi, %rsi ; movq %rsi, %rdx +; cmovzq %rdi, %rdx, %rdx ; movq %rbp, %rsp ; popq %rbp ; ret diff --git a/cranelift/filetests/filetests/isa/x64/div-checks.clif b/cranelift/filetests/filetests/isa/x64/div-checks.clif index af717f8e6c..573e9794ff 100644 --- a/cranelift/filetests/filetests/isa/x64/div-checks.clif +++ b/cranelift/filetests/filetests/isa/x64/div-checks.clif @@ -17,9 +17,8 @@ block0(v0: i8, v1: i8): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; xorl %r10d, %r10d, %r10d ; movq %rdi, %rax -; movq %r10, %rdx +; xorl %edx, %edx, %edx ; srem_seq %al, %dl, %sil, %al, %dl, tmp=(none) ; shrq $8, %rax, %rax ; movq %rbp, %rsp @@ -36,9 +35,8 @@ block0(v0: i16, v1: i16): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; xorl %r10d, %r10d, %r10d ; movq %rdi, %rax -; movq %r10, %rdx +; xorl %edx, %edx, %edx ; srem_seq %ax, %dx, %si, %ax, %dx, tmp=(none) ; movq %rdx, %rax ; movq %rbp, %rsp @@ -55,9 +53,8 @@ block0(v0: i32, v1: i32): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; xorl %r10d, %r10d, %r10d ; movq %rdi, %rax -; movq %r10, %rdx +; xorl %edx, %edx, %edx ; srem_seq %eax, %edx, %esi, %eax, %edx, tmp=(none) ; movq %rdx, %rax ; movq %rbp, %rsp @@ -74,9 +71,8 @@ block0(v0: i64, v1: i64): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; xorl %r10d, %r10d, %r10d ; movq %rdi, %rax -; movq %r10, %rdx +; xorl %edx, %edx, %edx ; srem_seq %rax, %rdx, %rsi, %rax, %rdx, tmp=(none) ; movq %rdx, %rax ; movq %rbp, %rsp diff --git a/cranelift/filetests/filetests/isa/x64/fastcall.clif b/cranelift/filetests/filetests/isa/x64/fastcall.clif index addc1118c3..efe8a5f2c6 100644 --- a/cranelift/filetests/filetests/isa/x64/fastcall.clif +++ b/cranelift/filetests/filetests/isa/x64/fastcall.clif @@ -149,20 +149,25 @@ block0(v0: i64): ; pushq %rbp ; unwind PushFrameRegs { offset_upward_to_caller_sp: 16 } ; movq %rsp, %rbp -; unwind DefineNewFrame { offset_upward_to_caller_sp: 16, offset_downward_to_clobbers: 0 } +; unwind DefineNewFrame { offset_upward_to_caller_sp: 16, offset_downward_to_clobbers: 16 } +; subq %rsp, $16, %rsp +; movq %rsi, 0(%rsp) +; unwind SaveReg { clobber_offset: 0, reg: p6i } ; block0: -; cvtsi2sd %rcx, %xmm2 +; cvtsi2sd %rcx, %xmm3 ; subq %rsp, $48, %rsp ; virtual_sp_offset_adjust 48 +; movq %rcx, 32(%rsp) +; movq %rcx, 40(%rsp) ; movq %rcx, %rdx -; movq %rdx, %r8 -; movdqa %xmm2, %xmm3 -; movq %r8, 32(%rsp) -; movq %r8, 40(%rsp) -; load_ext_name %g+0, %r8 -; call *%r8 +; load_ext_name %g+0, %rsi +; movq %rdx, %rcx +; movdqa %xmm3, %xmm2 +; call *%rsi ; addq %rsp, $48, %rsp ; virtual_sp_offset_adjust -48 +; movq 0(%rsp), %rsi +; addq %rsp, $16, %rsp ; movq %rbp, %rsp ; popq %rbp ; ret diff --git a/cranelift/filetests/filetests/isa/x64/fcvt.clif b/cranelift/filetests/filetests/isa/x64/fcvt.clif index 5f5e8b8488..88725b01da 100644 --- a/cranelift/filetests/filetests/isa/x64/fcvt.clif +++ b/cranelift/filetests/filetests/isa/x64/fcvt.clif @@ -190,10 +190,11 @@ block0(v0: i32x4): ; movdqa %xmm0, %xmm4 ; pslld %xmm4, $16, %xmm4 ; psrld %xmm4, $16, %xmm4 -; psubd %xmm0, %xmm4, %xmm0 +; movdqa %xmm0, %xmm10 +; psubd %xmm10, %xmm4, %xmm10 ; cvtdq2ps %xmm4, %xmm9 -; psrld %xmm0, $1, %xmm0 -; cvtdq2ps %xmm0, %xmm0 +; psrld %xmm10, $1, %xmm10 +; cvtdq2ps %xmm10, %xmm0 ; addps %xmm0, %xmm0, %xmm0 ; addps %xmm0, %xmm9, %xmm0 ; movq %rbp, %rsp @@ -434,14 +435,15 @@ block0(v0: f32x4): ; movq %rsp, %rbp ; block0: ; pxor %xmm3, %xmm3, %xmm3 -; maxps %xmm0, %xmm3, %xmm0 +; movdqa %xmm0, %xmm10 +; maxps %xmm10, %xmm3, %xmm10 ; pcmpeqd %xmm8, %xmm8, %xmm8 ; psrld %xmm8, $1, %xmm8 ; cvtdq2ps %xmm8, %xmm14 -; cvttps2dq %xmm0, %xmm13 -; subps %xmm0, %xmm14, %xmm0 -; cmpps $2, %xmm14, %xmm0, %xmm14 -; cvttps2dq %xmm0, %xmm0 +; cvttps2dq %xmm10, %xmm13 +; subps %xmm10, %xmm14, %xmm10 +; cmpps $2, %xmm14, %xmm10, %xmm14 +; cvttps2dq %xmm10, %xmm0 ; pxor %xmm0, %xmm14, %xmm0 ; pxor %xmm7, %xmm7, %xmm7 ; pmaxsd %xmm0, %xmm7, %xmm0 @@ -461,9 +463,10 @@ block0(v0: f32x4): ; block0: ; movdqa %xmm0, %xmm5 ; cmpps $0, %xmm5, %xmm0, %xmm5 -; andps %xmm0, %xmm5, %xmm0 -; pxor %xmm5, %xmm0, %xmm5 -; cvttps2dq %xmm0, %xmm9 +; movdqa %xmm0, %xmm6 +; andps %xmm6, %xmm5, %xmm6 +; pxor %xmm5, %xmm6, %xmm5 +; cvttps2dq %xmm6, %xmm9 ; movdqa %xmm9, %xmm0 ; pand %xmm0, %xmm5, %xmm0 ; psrad %xmm0, $31, %xmm0 diff --git a/cranelift/filetests/filetests/isa/x64/floor-libcall.clif b/cranelift/filetests/filetests/isa/x64/floor-libcall.clif index f965c6687c..745426d716 100644 --- a/cranelift/filetests/filetests/isa/x64/floor-libcall.clif +++ b/cranelift/filetests/filetests/isa/x64/floor-libcall.clif @@ -10,8 +10,8 @@ block0(v0: f32): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; load_ext_name %FloorF32+0, %r8 -; call *%r8 +; load_ext_name %FloorF32+0, %rdx +; call *%rdx ; movq %rbp, %rsp ; popq %rbp ; ret @@ -25,8 +25,8 @@ block0(v0: f64): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; load_ext_name %FloorF64+0, %r8 -; call *%r8 +; load_ext_name %FloorF64+0, %rdx +; call *%rdx ; movq %rbp, %rsp ; popq %rbp ; ret diff --git a/cranelift/filetests/filetests/isa/x64/fma-call.clif b/cranelift/filetests/filetests/isa/x64/fma-call.clif index ba57d01344..9bc580f656 100644 --- a/cranelift/filetests/filetests/isa/x64/fma-call.clif +++ b/cranelift/filetests/filetests/isa/x64/fma-call.clif @@ -10,8 +10,8 @@ block0(v0: f32, v1: f32, v2: f32): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; load_ext_name %FmaF32+0, %rsi -; call *%rsi +; load_ext_name %FmaF32+0, %r9 +; call *%r9 ; movq %rbp, %rsp ; popq %rbp ; ret @@ -25,8 +25,8 @@ block0(v0: f64, v1: f64, v2: f64): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; load_ext_name %FmaF64+0, %rsi -; call *%rsi +; load_ext_name %FmaF64+0, %r9 +; call *%r9 ; movq %rbp, %rsp ; popq %rbp ; ret diff --git a/cranelift/filetests/filetests/isa/x64/i128.clif b/cranelift/filetests/filetests/isa/x64/i128.clif index df5a7f597c..504cb1cc40 100644 --- a/cranelift/filetests/filetests/isa/x64/i128.clif +++ b/cranelift/filetests/filetests/isa/x64/i128.clif @@ -11,10 +11,10 @@ block0(v0: i128, v1: i128): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; addq %rdi, %rdx, %rdi -; adcq %rsi, %rcx, %rsi ; movq %rdi, %rax +; addq %rax, %rdx, %rax ; movq %rsi, %rdx +; adcq %rdx, %rcx, %rdx ; movq %rbp, %rsp ; popq %rbp ; ret @@ -28,10 +28,10 @@ block0(v0: i128, v1: i128): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; subq %rdi, %rdx, %rdi -; sbbq %rsi, %rcx, %rsi ; movq %rdi, %rax +; subq %rax, %rdx, %rax ; movq %rsi, %rdx +; sbbq %rdx, %rcx, %rdx ; movq %rbp, %rsp ; popq %rbp ; ret @@ -45,10 +45,10 @@ block0(v0: i128, v1: i128): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; andq %rdi, %rdx, %rdi -; andq %rsi, %rcx, %rsi ; movq %rdi, %rax +; andq %rax, %rdx, %rax ; movq %rsi, %rdx +; andq %rdx, %rcx, %rdx ; movq %rbp, %rsp ; popq %rbp ; ret @@ -62,10 +62,10 @@ block0(v0: i128, v1: i128): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; orq %rdi, %rdx, %rdi -; orq %rsi, %rcx, %rsi ; movq %rdi, %rax +; orq %rax, %rdx, %rax ; movq %rsi, %rdx +; orq %rdx, %rcx, %rdx ; movq %rbp, %rsp ; popq %rbp ; ret @@ -79,10 +79,10 @@ block0(v0: i128, v1: i128): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; xorq %rdi, %rdx, %rdi -; xorq %rsi, %rcx, %rsi ; movq %rdi, %rax +; xorq %rax, %rdx, %rax ; movq %rsi, %rdx +; xorq %rdx, %rcx, %rdx ; movq %rbp, %rsp ; popq %rbp ; ret @@ -96,10 +96,10 @@ block0(v0: i128): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; notq %rdi, %rdi -; notq %rsi, %rsi ; movq %rdi, %rax +; notq %rax, %rax ; movq %rsi, %rdx +; notq %rdx, %rdx ; movq %rbp, %rsp ; popq %rbp ; ret @@ -115,11 +115,11 @@ block0(v0: i128, v1: i128): ; block0: ; movq %rdi, %r8 ; imulq %r8, %rcx, %r8 +; movq %rdi, %rax ; imulq %rsi, %rdx, %rsi ; movq %r8, %r9 ; addq %r9, %rsi, %r9 ; movq %r9, %r8 -; movq %rdi, %rax ; mul %rax, %rdx, %rax, %rdx ; movq %r8, %rdi ; addq %rdi, %rdx, %rdi @@ -466,9 +466,9 @@ block0(v0: b1): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; andq %rdi, $1, %rdi -; xorq %rdx, %rdx, %rdx ; movq %rdi, %rax +; andq %rax, $1, %rax +; xorq %rdx, %rdx, %rdx ; movq %rbp, %rsp ; popq %rbp ; ret @@ -486,16 +486,17 @@ block0(v0: i128): ; shrq $1, %rdx, %rdx ; movabsq $8608480567731124087, %r10 ; andq %rdx, %r10, %rdx -; subq %rdi, %rdx, %rdi +; movq %rdi, %r11 +; subq %r11, %rdx, %r11 ; shrq $1, %rdx, %rdx ; andq %rdx, %r10, %rdx -; subq %rdi, %rdx, %rdi +; subq %r11, %rdx, %r11 ; shrq $1, %rdx, %rdx ; andq %rdx, %r10, %rdx -; subq %rdi, %rdx, %rdi -; movq %rdi, %rax +; subq %r11, %rdx, %r11 +; movq %r11, %rax ; shrq $4, %rax, %rax -; addq %rax, %rdi, %rax +; addq %rax, %r11, %rax ; movabsq $1085102592571150095, %rcx ; andq %rax, %rcx, %rax ; movabsq $72340172838076673, %r9 @@ -505,16 +506,17 @@ block0(v0: i128): ; shrq $1, %rcx, %rcx ; movabsq $8608480567731124087, %r8 ; andq %rcx, %r8, %rcx -; subq %rsi, %rcx, %rsi +; movq %rsi, %r9 +; subq %r9, %rcx, %r9 ; shrq $1, %rcx, %rcx ; andq %rcx, %r8, %rcx -; subq %rsi, %rcx, %rsi +; subq %r9, %rcx, %r9 ; shrq $1, %rcx, %rcx ; andq %rcx, %r8, %rcx -; subq %rsi, %rcx, %rsi -; movq %rsi, %rcx +; subq %r9, %rcx, %r9 +; movq %r9, %rcx ; shrq $4, %rcx, %rcx -; addq %rcx, %rsi, %rcx +; addq %rcx, %r9, %rcx ; movabsq $1085102592571150095, %rsi ; andq %rcx, %rsi, %rcx ; movabsq $72340172838076673, %rdx @@ -538,10 +540,11 @@ block0(v0: i128): ; movabsq $6148914691236517205, %r8 ; movq %rsi, %r9 ; andq %r9, %r8, %r9 -; shrq $1, %rsi, %rsi -; andq %rsi, %r8, %rsi +; movq %rsi, %rax +; shrq $1, %rax, %rax +; andq %rax, %r8, %rax ; shlq $1, %r9, %r9 -; orq %r9, %rsi, %r9 +; orq %r9, %rax, %r9 ; movabsq $3689348814741910323, %r11 ; movq %r9, %rsi ; andq %rsi, %r11, %rsi @@ -579,10 +582,11 @@ block0(v0: i128): ; movabsq $6148914691236517205, %rcx ; movq %rdi, %rdx ; andq %rdx, %rcx, %rdx -; shrq $1, %rdi, %rdi -; andq %rdi, %rcx, %rdi +; movq %rdi, %r11 +; shrq $1, %r11, %r11 +; andq %r11, %rcx, %r11 ; shlq $1, %rdx, %rdx -; orq %rdx, %rdi, %rdx +; orq %rdx, %r11, %rdx ; movabsq $3689348814741910323, %r9 ; movq %rdx, %r10 ; andq %r10, %r9, %r10 @@ -708,32 +712,42 @@ block0(v0: i128, v1: i128, v2: i64, v3: i128, v4: i128, v5: i128): ; pushq %rbp ; movq %rsp, %rbp -; subq %rsp, $16, %rsp +; subq %rsp, $32, %rsp ; movq %rbx, 0(%rsp) -; movq %r13, 8(%rsp) +; movq %r12, 8(%rsp) +; movq %r13, 16(%rsp) ; block0: +; movq %r9, %r13 +; movq %rcx, %rax +; movq %r8, %rcx +; movq %rax, %r8 ; movq 16(%rbp), %rbx ; movq 24(%rbp), %rax -; movq 32(%rbp), %r10 -; movq %r10, %r13 -; movq 40(%rbp), %r11 -; movq 48(%rbp), %r10 -; addq %rdi, %rdx, %rdi -; movq %rcx, %rdx -; adcq %rsi, %rdx, %rsi +; movq 32(%rbp), %r9 +; movq %r9, %r12 +; movq 40(%rbp), %r10 +; movq 48(%rbp), %r11 +; movq %rdi, %r9 +; addq %r9, %rdx, %r9 +; movq %r8, %rdi +; movq %rsi, %r8 +; adcq %r8, %rdi, %r8 ; xorq %rdx, %rdx, %rdx -; addq %r9, %r8, %r9 +; movq %rcx, %rsi +; movq %r13, %rdi +; addq %rdi, %rsi, %rdi ; adcq %rbx, %rdx, %rbx -; addq %rax, %r11, %rax -; movq %r13, %rdx -; adcq %rdx, %r10, %rdx -; addq %rdi, %r9, %rdi -; adcq %rsi, %rbx, %rsi -; addq %rax, %rdi, %rax -; adcq %rdx, %rsi, %rdx +; addq %rax, %r10, %rax +; movq %r12, %rdx +; adcq %rdx, %r11, %rdx +; addq %r9, %rdi, %r9 +; adcq %r8, %rbx, %r8 +; addq %rax, %r9, %rax +; adcq %rdx, %r8, %rdx ; movq 0(%rsp), %rbx -; movq 8(%rsp), %r13 -; addq %rsp, $16, %rsp +; movq 8(%rsp), %r12 +; movq 16(%rsp), %r13 +; addq %rsp, $32, %rsp ; movq %rbp, %rsp ; popq %rbp ; ret @@ -746,37 +760,41 @@ block0(v0: i128): ; pushq %rbp ; movq %rsp, %rbp -; subq %rsp, $32, %rsp +; subq %rsp, $48, %rsp ; movq %rbx, 0(%rsp) ; movq %r12, 8(%rsp) -; movq %r14, 16(%rsp) -; movq %r15, 24(%rsp) +; movq %r13, 16(%rsp) +; movq %r14, 24(%rsp) +; movq %r15, 32(%rsp) ; block0: ; movq %rdx, %r12 ; movq %rdi, %rax ; movq %rsi, %rdx ; movq %rdi, %r14 ; movq %rsi, %rbx +; movq %rdi, %r13 +; movq %rsi, %r15 ; movq %rdi, %r11 -; movq %rsi, %r9 ; movq %rdi, %r10 -; movq %rdi, %r8 ; movq %rsi, %rcx -; movq %r12, %r15 -; movq %r14, 0(%r15) -; movq %rbx, 8(%r15) -; movq %r11, 16(%r15) -; movq %r9, 24(%r15) -; movq %r10, 32(%r15) -; movq %r8, 40(%r15) -; movq %rcx, 48(%r15) -; movq %rdi, 56(%r15) -; movq %rsi, 64(%r15) +; movq %rdi, %r8 +; movq %rsi, %r9 +; movq %r12, %rdi +; movq %r14, 0(%rdi) +; movq %rbx, 8(%rdi) +; movq %r13, 16(%rdi) +; movq %r15, 24(%rdi) +; movq %r11, 32(%rdi) +; movq %r10, 40(%rdi) +; movq %rcx, 48(%rdi) +; movq %r8, 56(%rdi) +; movq %r9, 64(%rdi) ; movq 0(%rsp), %rbx ; movq 8(%rsp), %r12 -; movq 16(%rsp), %r14 -; movq 24(%rsp), %r15 -; addq %rsp, $32, %rsp +; movq 16(%rsp), %r13 +; movq 24(%rsp), %r14 +; movq 32(%rsp), %r15 +; addq %rsp, $48, %rsp ; movq %rbp, %rsp ; popq %rbp ; ret @@ -873,8 +891,8 @@ block0(v0: i8, v1: i128): ; block0: ; movq %rsi, %rcx ; andq %rcx, $7, %rcx -; shlb %cl, %dil, %dil ; movq %rdi, %rax +; shlb %cl, %al, %al ; movq %rbp, %rsp ; popq %rbp ; ret @@ -891,19 +909,21 @@ block0(v0: i128, v1: i128): ; movq %rdx, %rcx ; movq %rdi, %rdx ; shlq %cl, %rdx, %rdx -; shlq %cl, %rsi, %rsi -; movq %rcx, %rax +; movq %rsi, %r8 +; shlq %cl, %r8, %r8 +; movq %rcx, %rsi ; movl $64, %ecx -; movq %rax, %r8 -; subq %rcx, %r8, %rcx -; shrq %cl, %rdi, %rdi +; movq %rsi, %r9 +; subq %rcx, %r9, %rcx +; movq %rdi, %rsi +; shrq %cl, %rsi, %rsi ; xorq %rax, %rax, %rax -; testq $127, %r8 -; cmovzq %rax, %rdi, %rdi -; orq %rdi, %rsi, %rdi -; testq $64, %r8 +; testq $127, %r9 +; cmovzq %rax, %rsi, %rsi +; orq %rsi, %r8, %rsi +; testq $64, %r9 ; cmovzq %rdx, %rax, %rax -; cmovzq %rdi, %rdx, %rdx +; cmovzq %rsi, %rdx, %rdx ; movq %rbp, %rsp ; popq %rbp ; ret @@ -918,21 +938,23 @@ block0(v0: i128, v1: i128): ; movq %rsp, %rbp ; block0: ; movq %rdx, %rcx -; shrq %cl, %rdi, %rdi +; movq %rdi, %r10 +; shrq %cl, %r10, %r10 ; movq %rsi, %r8 ; shrq %cl, %r8, %r8 ; movl $64, %ecx ; movq %rdx, %r9 ; subq %rcx, %r9, %rcx -; shlq %cl, %rsi, %rsi +; movq %rsi, %rdi +; shlq %cl, %rdi, %rdi ; xorq %r11, %r11, %r11 ; testq $127, %r9 -; cmovzq %r11, %rsi, %rsi -; orq %rsi, %rdi, %rsi +; cmovzq %r11, %rdi, %rdi +; orq %rdi, %r10, %rdi ; xorq %rdx, %rdx, %rdx ; testq $64, %r9 ; movq %r8, %rax -; cmovzq %rsi, %rax, %rax +; cmovzq %rdi, %rax, %rax ; cmovzq %r8, %rdx, %rdx ; movq %rbp, %rsp ; popq %rbp @@ -948,25 +970,25 @@ block0(v0: i128, v1: i128): ; movq %rsp, %rbp ; block0: ; movq %rdx, %rcx -; shrq %cl, %rdi, %rdi -; movq %rsi, %rdx -; sarq %cl, %rdx, %rdx -; movq %rcx, %rax +; movq %rdi, %r8 +; shrq %cl, %r8, %r8 +; movq %rsi, %rdi +; sarq %cl, %rdi, %rdi ; movl $64, %ecx -; movq %rax, %r8 -; subq %rcx, %r8, %rcx +; movq %rdx, %r9 +; subq %rcx, %r9, %rcx ; movq %rsi, %r11 ; shlq %cl, %r11, %r11 ; xorq %rax, %rax, %rax -; testq $127, %r8 +; testq $127, %r9 ; cmovzq %rax, %r11, %r11 -; orq %rdi, %r11, %rdi -; sarq $63, %rsi, %rsi -; testq $64, %r8 -; movq %rdx, %rax -; cmovzq %rdi, %rax, %rax -; cmovzq %rdx, %rsi, %rsi +; orq %r8, %r11, %r8 ; movq %rsi, %rdx +; sarq $63, %rdx, %rdx +; testq $64, %r9 +; movq %rdi, %rax +; cmovzq %r8, %rax, %rax +; cmovzq %rdi, %rdx, %rdx ; movq %rbp, %rsp ; popq %rbp ; ret @@ -1000,19 +1022,20 @@ block0(v0: i128, v1: i128): ; movl $128, %ecx ; movq %r10, %r8 ; subq %rcx, %r8, %rcx -; shrq %cl, %rdi, %rdi +; movq %rdi, %r9 +; shrq %cl, %r9, %r9 ; movq %rsi, %r11 ; shrq %cl, %r11, %r11 -; movq %rcx, %r8 +; movq %rcx, %rdi ; movl $64, %ecx -; subq %rcx, %r8, %rcx +; subq %rcx, %rdi, %rcx ; shlq %cl, %rsi, %rsi ; xorq %r10, %r10, %r10 -; testq $127, %r8 +; testq $127, %rdi ; cmovzq %r10, %rsi, %rsi -; orq %rsi, %rdi, %rsi +; orq %rsi, %r9, %rsi ; xorq %r10, %r10, %r10 -; testq $64, %r8 +; testq $64, %rdi ; movq %r11, %rdi ; cmovzq %rsi, %rdi, %rdi ; cmovzq %r11, %r10, %r10 @@ -1057,17 +1080,17 @@ block0(v0: i128, v1: i128): ; subq %rcx, %r8, %rcx ; movq %rdi, %r11 ; shlq %cl, %r11, %r11 -; shlq %cl, %rsi, %rsi -; movq %rcx, %r8 +; movq %rsi, %r9 +; shlq %cl, %r9, %r9 +; movq %rcx, %rsi ; movl $64, %ecx -; movq %r8, %r9 -; subq %rcx, %r9, %rcx +; subq %rcx, %rsi, %rcx ; shrq %cl, %rdi, %rdi ; xorq %r8, %r8, %r8 -; testq $127, %r9 +; testq $127, %rsi ; cmovzq %r8, %rdi, %rdi -; orq %rdi, %rsi, %rdi -; testq $64, %r9 +; orq %rdi, %r9, %rdi +; testq $64, %rsi ; cmovzq %r11, %r8, %r8 ; cmovzq %rdi, %r11, %r11 ; orq %rax, %r8, %rax diff --git a/cranelift/filetests/filetests/isa/x64/ishl.clif b/cranelift/filetests/filetests/isa/x64/ishl.clif index 88532f8c5b..e171cf9774 100644 --- a/cranelift/filetests/filetests/isa/x64/ishl.clif +++ b/cranelift/filetests/filetests/isa/x64/ishl.clif @@ -20,19 +20,21 @@ block0(v0: i128, v1: i8): ; movzbq %dl, %rcx ; movq %rdi, %rdx ; shlq %cl, %rdx, %rdx -; shlq %cl, %rsi, %rsi +; movq %rsi, %r8 +; shlq %cl, %r8, %r8 ; movq %rcx, %r11 ; movl $64, %ecx -; movq %r11, %r8 -; subq %rcx, %r8, %rcx -; shrq %cl, %rdi, %rdi +; movq %r11, %r9 +; subq %rcx, %r9, %rcx +; movq %rdi, %rsi +; shrq %cl, %rsi, %rsi ; xorq %rax, %rax, %rax -; testq $127, %r8 -; cmovzq %rax, %rdi, %rdi -; orq %rdi, %rsi, %rdi -; testq $64, %r8 +; testq $127, %r9 +; cmovzq %rax, %rsi, %rsi +; orq %rsi, %r8, %rsi +; testq $64, %r9 ; cmovzq %rdx, %rax, %rax -; cmovzq %rdi, %rdx, %rdx +; cmovzq %rsi, %rdx, %rdx ; movq %rbp, %rsp ; popq %rbp ; ret @@ -54,14 +56,15 @@ block0(v0: i128, v1: i64): ; movl $64, %ecx ; movq %r11, %r8 ; subq %rcx, %r8, %rcx -; shrq %cl, %rdi, %rdi +; movq %rdi, %r11 +; shrq %cl, %r11, %r11 ; xorq %rax, %rax, %rax ; testq $127, %r8 -; cmovzq %rax, %rdi, %rdi -; orq %rdi, %rsi, %rdi +; cmovzq %rax, %r11, %r11 +; orq %r11, %rsi, %r11 ; testq $64, %r8 ; cmovzq %rdx, %rax, %rax -; cmovzq %rdi, %rdx, %rdx +; cmovzq %r11, %rdx, %rdx ; movq %rbp, %rsp ; popq %rbp ; ret @@ -83,14 +86,15 @@ block0(v0: i128, v1: i32): ; movl $64, %ecx ; movq %r11, %r8 ; subq %rcx, %r8, %rcx -; shrq %cl, %rdi, %rdi +; movq %rdi, %r11 +; shrq %cl, %r11, %r11 ; xorq %rax, %rax, %rax ; testq $127, %r8 -; cmovzq %rax, %rdi, %rdi -; orq %rdi, %rsi, %rdi +; cmovzq %rax, %r11, %r11 +; orq %r11, %rsi, %r11 ; testq $64, %r8 ; cmovzq %rdx, %rax, %rax -; cmovzq %rdi, %rdx, %rdx +; cmovzq %r11, %rdx, %rdx ; movq %rbp, %rsp ; popq %rbp ; ret @@ -112,14 +116,15 @@ block0(v0: i128, v1: i16): ; movl $64, %ecx ; movq %r11, %r8 ; subq %rcx, %r8, %rcx -; shrq %cl, %rdi, %rdi +; movq %rdi, %r11 +; shrq %cl, %r11, %r11 ; xorq %rax, %rax, %rax ; testq $127, %r8 -; cmovzq %rax, %rdi, %rdi -; orq %rdi, %rsi, %rdi +; cmovzq %rax, %r11, %r11 +; orq %r11, %rsi, %r11 ; testq $64, %r8 ; cmovzq %rdx, %rax, %rax -; cmovzq %rdi, %rdx, %rdx +; cmovzq %r11, %rdx, %rdx ; movq %rbp, %rsp ; popq %rbp ; ret @@ -141,14 +146,15 @@ block0(v0: i128, v1: i8): ; movl $64, %ecx ; movq %r11, %r8 ; subq %rcx, %r8, %rcx -; shrq %cl, %rdi, %rdi +; movq %rdi, %r11 +; shrq %cl, %r11, %r11 ; xorq %rax, %rax, %rax ; testq $127, %r8 -; cmovzq %rax, %rdi, %rdi -; orq %rdi, %rsi, %rdi +; cmovzq %rax, %r11, %r11 +; orq %r11, %rsi, %r11 ; testq $64, %r8 ; cmovzq %rdx, %rax, %rax -; cmovzq %rdi, %rdx, %rdx +; cmovzq %r11, %rdx, %rdx ; movq %rbp, %rsp ; popq %rbp ; ret @@ -163,8 +169,8 @@ block0(v0: i64, v1: i128): ; movq %rsp, %rbp ; block0: ; movq %rsi, %rcx -; shlq %cl, %rdi, %rdi ; movq %rdi, %rax +; shlq %cl, %rax, %rax ; movq %rbp, %rsp ; popq %rbp ; ret @@ -179,8 +185,8 @@ block0(v0: i32, v1: i128): ; movq %rsp, %rbp ; block0: ; movq %rsi, %rcx -; shll %cl, %edi, %edi ; movq %rdi, %rax +; shll %cl, %eax, %eax ; movq %rbp, %rsp ; popq %rbp ; ret @@ -196,8 +202,8 @@ block0(v0: i16, v1: i128): ; block0: ; movq %rsi, %rcx ; andq %rcx, $15, %rcx -; shlw %cl, %di, %di ; movq %rdi, %rax +; shlw %cl, %ax, %ax ; movq %rbp, %rsp ; popq %rbp ; ret @@ -213,8 +219,8 @@ block0(v0: i8, v1: i128): ; block0: ; movq %rsi, %rcx ; andq %rcx, $7, %rcx -; shlb %cl, %dil, %dil ; movq %rdi, %rax +; shlb %cl, %al, %al ; movq %rbp, %rsp ; popq %rbp ; ret @@ -229,8 +235,8 @@ block0(v0: i64, v1: i64): ; movq %rsp, %rbp ; block0: ; movq %rsi, %rcx -; shlq %cl, %rdi, %rdi ; movq %rdi, %rax +; shlq %cl, %rax, %rax ; movq %rbp, %rsp ; popq %rbp ; ret @@ -245,8 +251,8 @@ block0(v0: i64, v1: i32): ; movq %rsp, %rbp ; block0: ; movq %rsi, %rcx -; shlq %cl, %rdi, %rdi ; movq %rdi, %rax +; shlq %cl, %rax, %rax ; movq %rbp, %rsp ; popq %rbp ; ret @@ -261,8 +267,8 @@ block0(v0: i64, v1: i16): ; movq %rsp, %rbp ; block0: ; movq %rsi, %rcx -; shlq %cl, %rdi, %rdi ; movq %rdi, %rax +; shlq %cl, %rax, %rax ; movq %rbp, %rsp ; popq %rbp ; ret @@ -277,8 +283,8 @@ block0(v0: i64, v1: i8): ; movq %rsp, %rbp ; block0: ; movq %rsi, %rcx -; shlq %cl, %rdi, %rdi ; movq %rdi, %rax +; shlq %cl, %rax, %rax ; movq %rbp, %rsp ; popq %rbp ; ret @@ -293,8 +299,8 @@ block0(v0: i32, v1: i64): ; movq %rsp, %rbp ; block0: ; movq %rsi, %rcx -; shll %cl, %edi, %edi ; movq %rdi, %rax +; shll %cl, %eax, %eax ; movq %rbp, %rsp ; popq %rbp ; ret @@ -309,8 +315,8 @@ block0(v0: i32, v1: i32): ; movq %rsp, %rbp ; block0: ; movq %rsi, %rcx -; shll %cl, %edi, %edi ; movq %rdi, %rax +; shll %cl, %eax, %eax ; movq %rbp, %rsp ; popq %rbp ; ret @@ -325,8 +331,8 @@ block0(v0: i32, v1: i16): ; movq %rsp, %rbp ; block0: ; movq %rsi, %rcx -; shll %cl, %edi, %edi ; movq %rdi, %rax +; shll %cl, %eax, %eax ; movq %rbp, %rsp ; popq %rbp ; ret @@ -341,8 +347,8 @@ block0(v0: i32, v1: i8): ; movq %rsp, %rbp ; block0: ; movq %rsi, %rcx -; shll %cl, %edi, %edi ; movq %rdi, %rax +; shll %cl, %eax, %eax ; movq %rbp, %rsp ; popq %rbp ; ret @@ -358,8 +364,8 @@ block0(v0: i16, v1: i64): ; block0: ; movq %rsi, %rcx ; andq %rcx, $15, %rcx -; shlw %cl, %di, %di ; movq %rdi, %rax +; shlw %cl, %ax, %ax ; movq %rbp, %rsp ; popq %rbp ; ret @@ -375,8 +381,8 @@ block0(v0: i16, v1: i32): ; block0: ; movq %rsi, %rcx ; andq %rcx, $15, %rcx -; shlw %cl, %di, %di ; movq %rdi, %rax +; shlw %cl, %ax, %ax ; movq %rbp, %rsp ; popq %rbp ; ret @@ -392,8 +398,8 @@ block0(v0: i16, v1: i16): ; block0: ; movq %rsi, %rcx ; andq %rcx, $15, %rcx -; shlw %cl, %di, %di ; movq %rdi, %rax +; shlw %cl, %ax, %ax ; movq %rbp, %rsp ; popq %rbp ; ret @@ -409,8 +415,8 @@ block0(v0: i16, v1: i8): ; block0: ; movq %rsi, %rcx ; andq %rcx, $15, %rcx -; shlw %cl, %di, %di ; movq %rdi, %rax +; shlw %cl, %ax, %ax ; movq %rbp, %rsp ; popq %rbp ; ret @@ -426,8 +432,8 @@ block0(v0: i8, v1: i64): ; block0: ; movq %rsi, %rcx ; andq %rcx, $7, %rcx -; shlb %cl, %dil, %dil ; movq %rdi, %rax +; shlb %cl, %al, %al ; movq %rbp, %rsp ; popq %rbp ; ret @@ -443,8 +449,8 @@ block0(v0: i8, v1: i32): ; block0: ; movq %rsi, %rcx ; andq %rcx, $7, %rcx -; shlb %cl, %dil, %dil ; movq %rdi, %rax +; shlb %cl, %al, %al ; movq %rbp, %rsp ; popq %rbp ; ret @@ -460,8 +466,8 @@ block0(v0: i8, v1: i16): ; block0: ; movq %rsi, %rcx ; andq %rcx, $7, %rcx -; shlb %cl, %dil, %dil ; movq %rdi, %rax +; shlb %cl, %al, %al ; movq %rbp, %rsp ; popq %rbp ; ret @@ -477,8 +483,8 @@ block0(v0: i8, v1: i8): ; block0: ; movq %rsi, %rcx ; andq %rcx, $7, %rcx -; shlb %cl, %dil, %dil ; movq %rdi, %rax +; shlb %cl, %al, %al ; movq %rbp, %rsp ; popq %rbp ; ret @@ -492,8 +498,8 @@ block0(v0: i64): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; shlq $1, %rdi, %rdi ; movq %rdi, %rax +; shlq $1, %rax, %rax ; movq %rbp, %rsp ; popq %rbp ; ret @@ -507,8 +513,8 @@ block0(v0: i32): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; shll $1, %edi, %edi ; movq %rdi, %rax +; shll $1, %eax, %eax ; movq %rbp, %rsp ; popq %rbp ; ret @@ -522,8 +528,8 @@ block0(v0: i16): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; shlw $1, %di, %di ; movq %rdi, %rax +; shlw $1, %ax, %ax ; movq %rbp, %rsp ; popq %rbp ; ret @@ -537,8 +543,8 @@ block0(v0: i8): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; shlb $1, %dil, %dil ; movq %rdi, %rax +; shlb $1, %al, %al ; movq %rbp, %rsp ; popq %rbp ; ret diff --git a/cranelift/filetests/filetests/isa/x64/load-op.clif b/cranelift/filetests/filetests/isa/x64/load-op.clif index a2b3e8a019..dff0e567b0 100644 --- a/cranelift/filetests/filetests/isa/x64/load-op.clif +++ b/cranelift/filetests/filetests/isa/x64/load-op.clif @@ -11,8 +11,8 @@ block0(v0: i64, v1: i32): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; addl %esi, 0(%rdi), %esi ; movq %rsi, %rax +; addl %eax, 0(%rdi), %eax ; movq %rbp, %rsp ; popq %rbp ; ret @@ -27,8 +27,8 @@ block0(v0: i64, v1: i32): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; addl %esi, 0(%rdi), %esi ; movq %rsi, %rax +; addl %eax, 0(%rdi), %eax ; movq %rbp, %rsp ; popq %rbp ; ret @@ -43,8 +43,8 @@ block0(v0: i64, v1: i64): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; addq %rsi, 0(%rdi), %rsi ; movq %rsi, %rax +; addq %rax, 0(%rdi), %rax ; movq %rbp, %rsp ; popq %rbp ; ret @@ -56,14 +56,11 @@ block0(v0: i64, v1: i64): return v3 } -;; test narrow loads: 8-bit load should not merge because the `addl` is 32 bits -;; and would load 32 bits from memory, which may go beyond the end of the heap. - ; pushq %rbp ; movq %rsp, %rbp ; block0: -; addq %rsi, 0(%rdi), %rsi ; movq %rsi, %rax +; addq %rax, 0(%rdi), %rax ; movq %rbp, %rsp ; popq %rbp ; ret diff --git a/cranelift/filetests/filetests/isa/x64/narrowing.clif b/cranelift/filetests/filetests/isa/x64/narrowing.clif index e0c8b6f4fc..7ca0306967 100644 --- a/cranelift/filetests/filetests/isa/x64/narrowing.clif +++ b/cranelift/filetests/filetests/isa/x64/narrowing.clif @@ -44,8 +44,9 @@ block0(v0: f64x2): ; cmppd $0, %xmm5, %xmm0, %xmm5 ; movupd const(0), %xmm6 ; andps %xmm5, %xmm6, %xmm5 -; minpd %xmm0, %xmm5, %xmm0 -; cvttpd2dq %xmm0, %xmm0 +; movdqa %xmm0, %xmm9 +; minpd %xmm9, %xmm5, %xmm9 +; cvttpd2dq %xmm9, %xmm0 ; movq %rbp, %rsp ; popq %rbp ; ret diff --git a/cranelift/filetests/filetests/isa/x64/nearest-libcall.clif b/cranelift/filetests/filetests/isa/x64/nearest-libcall.clif index 8de2c110be..53df3468aa 100644 --- a/cranelift/filetests/filetests/isa/x64/nearest-libcall.clif +++ b/cranelift/filetests/filetests/isa/x64/nearest-libcall.clif @@ -10,8 +10,8 @@ block0(v0: f32): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; load_ext_name %NearestF32+0, %r8 -; call *%r8 +; load_ext_name %NearestF32+0, %rdx +; call *%rdx ; movq %rbp, %rsp ; popq %rbp ; ret @@ -25,8 +25,8 @@ block0(v0: f64): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; load_ext_name %NearestF64+0, %r8 -; call *%r8 +; load_ext_name %NearestF64+0, %rdx +; call *%rdx ; movq %rbp, %rsp ; popq %rbp ; ret diff --git a/cranelift/filetests/filetests/isa/x64/popcnt.clif b/cranelift/filetests/filetests/isa/x64/popcnt.clif index 4f0be7407d..4230200d15 100644 --- a/cranelift/filetests/filetests/isa/x64/popcnt.clif +++ b/cranelift/filetests/filetests/isa/x64/popcnt.clif @@ -14,16 +14,17 @@ block0(v0: i64): ; shrq $1, %rax, %rax ; movabsq $8608480567731124087, %r8 ; andq %rax, %r8, %rax -; subq %rdi, %rax, %rdi +; movq %rdi, %r9 +; subq %r9, %rax, %r9 ; shrq $1, %rax, %rax ; andq %rax, %r8, %rax -; subq %rdi, %rax, %rdi +; subq %r9, %rax, %r9 ; shrq $1, %rax, %rax ; andq %rax, %r8, %rax -; subq %rdi, %rax, %rdi -; movq %rdi, %rax +; subq %r9, %rax, %r9 +; movq %r9, %rax ; shrq $4, %rax, %rax -; addq %rax, %rdi, %rax +; addq %rax, %r9, %rax ; movabsq $1085102592571150095, %rsi ; andq %rax, %rsi, %rax ; movabsq $72340172838076673, %rdx @@ -80,16 +81,17 @@ block0(v0: i32): ; shrl $1, %eax, %eax ; movl $2004318071, %r8d ; andl %eax, %r8d, %eax -; subl %edi, %eax, %edi +; movq %rdi, %r9 +; subl %r9d, %eax, %r9d ; shrl $1, %eax, %eax ; andl %eax, %r8d, %eax -; subl %edi, %eax, %edi +; subl %r9d, %eax, %r9d ; shrl $1, %eax, %eax ; andl %eax, %r8d, %eax -; subl %edi, %eax, %edi -; movq %rdi, %rax +; subl %r9d, %eax, %r9d +; movq %r9, %rax ; shrl $4, %eax, %eax -; addl %eax, %edi, %eax +; addl %eax, %r9d, %eax ; andl %eax, $252645135, %eax ; imull %eax, $16843009, %eax ; shrl $24, %eax, %eax diff --git a/cranelift/filetests/filetests/isa/x64/sdiv.clif b/cranelift/filetests/filetests/isa/x64/sdiv.clif index 33c402a5a0..543d7b82a3 100644 --- a/cranelift/filetests/filetests/isa/x64/sdiv.clif +++ b/cranelift/filetests/filetests/isa/x64/sdiv.clif @@ -12,7 +12,6 @@ block0(v0: i8, v1: i8): ; block0: ; movq %rdi, %rax ; cbw %al, %al -; movq %rax, %r11 ; idiv %al, (none), %sil, %al, (none) ; movq %rbp, %rsp ; popq %rbp @@ -29,7 +28,6 @@ block0(v0: i16, v1: i16): ; block0: ; movq %rdi, %rax ; cwd %ax, %dx -; movq %rdx, %rcx ; idiv %ax, %dx, %si, %ax, %dx ; movq %rbp, %rsp ; popq %rbp @@ -46,7 +44,6 @@ block0(v0: i32, v1: i32): ; block0: ; movq %rdi, %rax ; cdq %eax, %edx -; movq %rdx, %rcx ; idiv %eax, %edx, %esi, %eax, %edx ; movq %rbp, %rsp ; popq %rbp @@ -63,7 +60,6 @@ block0(v0: i64, v1: i64): ; block0: ; movq %rdi, %rax ; cqo %rax, %rdx -; movq %rdx, %rcx ; idiv %rax, %rdx, %rsi, %rax, %rdx ; movq %rbp, %rsp ; popq %rbp diff --git a/cranelift/filetests/filetests/isa/x64/select-i128.clif b/cranelift/filetests/filetests/isa/x64/select-i128.clif index c88e3c3c2a..14dd0a352f 100644 --- a/cranelift/filetests/filetests/isa/x64/select-i128.clif +++ b/cranelift/filetests/filetests/isa/x64/select-i128.clif @@ -14,10 +14,11 @@ block0(v0: i32, v1: i128, v2: i128): ; movq %rsp, %rbp ; block0: ; cmpl $42, %edi -; cmovzq %rsi, %rcx, %rcx -; cmovzq %rdx, %r8, %r8 ; movq %rcx, %rax -; movq %r8, %rdx +; cmovzq %rsi, %rax, %rax +; movq %r8, %rcx +; cmovzq %rdx, %rcx, %rcx +; movq %rcx, %rdx ; movq %rbp, %rsp ; popq %rbp ; ret @@ -33,12 +34,12 @@ block0(v0: f32, v1: i128, v2: i128): ; movq %rsp, %rbp ; block0: ; ucomiss %xmm0, %xmm0 -; cmovnzq %rdx, %rdi, %rdi -; cmovpq %rdx, %rdi, %rdi -; cmovnzq %rcx, %rsi, %rsi -; cmovpq %rcx, %rsi, %rsi ; movq %rdi, %rax +; cmovnzq %rdx, %rax, %rax +; cmovpq %rdx, %rax, %rax ; movq %rsi, %rdx +; cmovnzq %rcx, %rdx, %rdx +; cmovpq %rcx, %rdx, %rdx ; movq %rbp, %rsp ; popq %rbp ; ret diff --git a/cranelift/filetests/filetests/isa/x64/simd-bitwise-compile.clif b/cranelift/filetests/filetests/isa/x64/simd-bitwise-compile.clif index f778d8e024..69141f4750 100644 --- a/cranelift/filetests/filetests/isa/x64/simd-bitwise-compile.clif +++ b/cranelift/filetests/filetests/isa/x64/simd-bitwise-compile.clif @@ -159,8 +159,9 @@ block0(v0: b16x8, v1: i16x8, v2: i16x8): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; pblendvb %xmm2, %xmm1, %xmm2 -; movdqa %xmm2, %xmm0 +; movdqa %xmm2, %xmm6 +; pblendvb %xmm6, %xmm1, %xmm6 +; movdqa %xmm6, %xmm0 ; movq %rbp, %rsp ; popq %rbp ; ret @@ -174,8 +175,9 @@ block0(v0: b32x4, v1: f32x4, v2: f32x4): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; blendvps %xmm2, %xmm1, %xmm2 -; movdqa %xmm2, %xmm0 +; movdqa %xmm2, %xmm6 +; blendvps %xmm6, %xmm1, %xmm6 +; movdqa %xmm6, %xmm0 ; movq %rbp, %rsp ; popq %rbp ; ret @@ -189,8 +191,9 @@ block0(v0: b64x2, v1: f64x2, v2: f64x2): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; blendvpd %xmm2, %xmm1, %xmm2 -; movdqa %xmm2, %xmm0 +; movdqa %xmm2, %xmm6 +; blendvpd %xmm6, %xmm1, %xmm6 +; movdqa %xmm6, %xmm0 ; movq %rbp, %rsp ; popq %rbp ; ret @@ -206,12 +209,13 @@ block0(v0: i32): ; movq %rsp, %rbp ; block0: ; movdqu const(1), %xmm0 -; andq %rdi, $7, %rdi -; movd %edi, %xmm6 +; movq %rdi, %r11 +; andq %r11, $7, %r11 +; movd %r11d, %xmm6 ; psllw %xmm0, %xmm6, %xmm0 -; lea const(0), %rax -; shlq $4, %rdi, %rdi -; movdqu 0(%rax,%rdi,1), %xmm14 +; lea const(0), %rdi +; shlq $4, %r11, %r11 +; movdqu 0(%rdi,%r11,1), %xmm14 ; pand %xmm0, %xmm14, %xmm0 ; movq %rbp, %rsp ; popq %rbp @@ -252,12 +256,13 @@ block0(v0: i32): ; movq %rsp, %rbp ; block0: ; movdqu const(0), %xmm9 -; andq %rdi, $7, %rdi +; movq %rdi, %r10 +; andq %r10, $7, %r10 ; movdqa %xmm9, %xmm0 ; punpcklbw %xmm0, %xmm9, %xmm0 ; punpckhbw %xmm9, %xmm9, %xmm9 -; addl %edi, $8, %edi -; movd %edi, %xmm12 +; addl %r10d, $8, %r10d +; movd %r10d, %xmm12 ; psraw %xmm0, %xmm12, %xmm0 ; psraw %xmm9, %xmm12, %xmm9 ; packsswb %xmm0, %xmm9, %xmm0 @@ -279,14 +284,14 @@ block0(v0: i8x16, v1: i32): ; movdqa %xmm0, %xmm14 ; punpcklbw %xmm14, %xmm0, %xmm14 ; movdqa %xmm14, %xmm13 -; punpckhbw %xmm0, %xmm0, %xmm0 -; movdqa %xmm0, %xmm6 +; movdqa %xmm0, %xmm14 +; punpckhbw %xmm14, %xmm0, %xmm14 ; addl %r11d, $8, %r11d -; movd %r11d, %xmm14 +; movd %r11d, %xmm15 ; movdqa %xmm13, %xmm0 -; psraw %xmm0, %xmm14, %xmm0 -; psraw %xmm6, %xmm14, %xmm6 -; packsswb %xmm0, %xmm6, %xmm0 +; psraw %xmm0, %xmm15, %xmm0 +; psraw %xmm14, %xmm15, %xmm14 +; packsswb %xmm0, %xmm14, %xmm0 ; movq %rbp, %rsp ; popq %rbp ; ret diff --git a/cranelift/filetests/filetests/isa/x64/simd-logical-compile.clif b/cranelift/filetests/filetests/isa/x64/simd-logical-compile.clif index 2be24e4b18..40ab8cc76b 100644 --- a/cranelift/filetests/filetests/isa/x64/simd-logical-compile.clif +++ b/cranelift/filetests/filetests/isa/x64/simd-logical-compile.clif @@ -42,8 +42,9 @@ block0(v0: i64x2): ; movq %rsp, %rbp ; block0: ; pxor %xmm3, %xmm3, %xmm3 -; pcmpeqq %xmm0, %xmm3, %xmm0 -; ptest %xmm0, %xmm0 +; movdqa %xmm0, %xmm5 +; pcmpeqq %xmm5, %xmm3, %xmm5 +; ptest %xmm5, %xmm5 ; setz %al ; movq %rbp, %rsp ; popq %rbp diff --git a/cranelift/filetests/filetests/isa/x64/srem.clif b/cranelift/filetests/filetests/isa/x64/srem.clif index a176ef793c..131d7ac89f 100644 --- a/cranelift/filetests/filetests/isa/x64/srem.clif +++ b/cranelift/filetests/filetests/isa/x64/srem.clif @@ -10,9 +10,8 @@ block0(v0: i8, v1: i8): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; xorl %r10d, %r10d, %r10d ; movq %rdi, %rax -; movq %r10, %rdx +; xorl %edx, %edx, %edx ; srem_seq %al, %dl, %sil, %al, %dl, tmp=(none) ; shrq $8, %rax, %rax ; movq %rbp, %rsp @@ -28,9 +27,8 @@ block0(v0: i16, v1: i16): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; xorl %r10d, %r10d, %r10d ; movq %rdi, %rax -; movq %r10, %rdx +; xorl %edx, %edx, %edx ; srem_seq %ax, %dx, %si, %ax, %dx, tmp=(none) ; movq %rdx, %rax ; movq %rbp, %rsp @@ -46,9 +44,8 @@ block0(v0: i32, v1: i32): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; xorl %r10d, %r10d, %r10d ; movq %rdi, %rax -; movq %r10, %rdx +; xorl %edx, %edx, %edx ; srem_seq %eax, %edx, %esi, %eax, %edx, tmp=(none) ; movq %rdx, %rax ; movq %rbp, %rsp @@ -64,9 +61,8 @@ block0(v0: i64, v1: i64): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; xorl %r10d, %r10d, %r10d ; movq %rdi, %rax -; movq %r10, %rdx +; xorl %edx, %edx, %edx ; srem_seq %rax, %rdx, %rsi, %rax, %rdx, tmp=(none) ; movq %rdx, %rax ; movq %rbp, %rsp diff --git a/cranelift/filetests/filetests/isa/x64/sshr.clif b/cranelift/filetests/filetests/isa/x64/sshr.clif index b92e9c9001..50f88e0612 100644 --- a/cranelift/filetests/filetests/isa/x64/sshr.clif +++ b/cranelift/filetests/filetests/isa/x64/sshr.clif @@ -17,25 +17,26 @@ block0(v0: i128, v1: i8): ; movq %rsp, %rbp ; block0: ; movzbq %dl, %rcx -; shrq %cl, %rdi, %rdi -; movq %rsi, %rdx -; sarq %cl, %rdx, %rdx +; movq %rdi, %r8 +; shrq %cl, %r8, %r8 +; movq %rsi, %rdi +; sarq %cl, %rdi, %rdi ; movq %rcx, %rax ; movl $64, %ecx -; movq %rax, %r8 -; subq %rcx, %r8, %rcx +; movq %rax, %r9 +; subq %rcx, %r9, %rcx ; movq %rsi, %r11 ; shlq %cl, %r11, %r11 ; xorq %rax, %rax, %rax -; testq $127, %r8 +; testq $127, %r9 ; cmovzq %rax, %r11, %r11 -; orq %rdi, %r11, %rdi -; sarq $63, %rsi, %rsi -; testq $64, %r8 -; movq %rdx, %rax -; cmovzq %rdi, %rax, %rax -; cmovzq %rdx, %rsi, %rsi +; orq %r8, %r11, %r8 ; movq %rsi, %rdx +; sarq $63, %rdx, %rdx +; testq $64, %r9 +; movq %rdi, %rax +; cmovzq %r8, %rax, %rax +; cmovzq %rdi, %rdx, %rdx ; movq %rbp, %rsp ; popq %rbp ; ret @@ -50,23 +51,25 @@ block0(v0: i128, v1: i64): ; movq %rsp, %rbp ; block0: ; movq %rdx, %rcx -; shrq %cl, %rdi, %rdi +; movq %rdi, %r8 +; shrq %cl, %r8, %r8 ; movq %rsi, %r11 ; sarq %cl, %r11, %r11 ; movl $64, %ecx -; subq %rcx, %rdx, %rcx +; movq %rdx, %r9 +; subq %rcx, %r9, %rcx ; movq %rsi, %r10 ; shlq %cl, %r10, %r10 -; xorq %rax, %rax, %rax -; testq $127, %rdx -; cmovzq %rax, %r10, %r10 -; orq %rdi, %r10, %rdi -; sarq $63, %rsi, %rsi -; testq $64, %rdx -; movq %r11, %rax -; cmovzq %rdi, %rax, %rax -; cmovzq %r11, %rsi, %rsi +; xorq %rdi, %rdi, %rdi +; testq $127, %r9 +; cmovzq %rdi, %r10, %r10 +; orq %r8, %r10, %r8 ; movq %rsi, %rdx +; sarq $63, %rdx, %rdx +; testq $64, %r9 +; movq %r11, %rax +; cmovzq %r8, %rax, %rax +; cmovzq %r11, %rdx, %rdx ; movq %rbp, %rsp ; popq %rbp ; ret @@ -81,23 +84,25 @@ block0(v0: i128, v1: i32): ; movq %rsp, %rbp ; block0: ; movq %rdx, %rcx -; shrq %cl, %rdi, %rdi +; movq %rdi, %r8 +; shrq %cl, %r8, %r8 ; movq %rsi, %r11 ; sarq %cl, %r11, %r11 ; movl $64, %ecx -; subq %rcx, %rdx, %rcx +; movq %rdx, %r9 +; subq %rcx, %r9, %rcx ; movq %rsi, %r10 ; shlq %cl, %r10, %r10 -; xorq %rax, %rax, %rax -; testq $127, %rdx -; cmovzq %rax, %r10, %r10 -; orq %rdi, %r10, %rdi -; sarq $63, %rsi, %rsi -; testq $64, %rdx -; movq %r11, %rax -; cmovzq %rdi, %rax, %rax -; cmovzq %r11, %rsi, %rsi +; xorq %rdi, %rdi, %rdi +; testq $127, %r9 +; cmovzq %rdi, %r10, %r10 +; orq %r8, %r10, %r8 ; movq %rsi, %rdx +; sarq $63, %rdx, %rdx +; testq $64, %r9 +; movq %r11, %rax +; cmovzq %r8, %rax, %rax +; cmovzq %r11, %rdx, %rdx ; movq %rbp, %rsp ; popq %rbp ; ret @@ -112,23 +117,25 @@ block0(v0: i128, v1: i16): ; movq %rsp, %rbp ; block0: ; movq %rdx, %rcx -; shrq %cl, %rdi, %rdi +; movq %rdi, %r8 +; shrq %cl, %r8, %r8 ; movq %rsi, %r11 ; sarq %cl, %r11, %r11 ; movl $64, %ecx -; subq %rcx, %rdx, %rcx +; movq %rdx, %r9 +; subq %rcx, %r9, %rcx ; movq %rsi, %r10 ; shlq %cl, %r10, %r10 -; xorq %rax, %rax, %rax -; testq $127, %rdx -; cmovzq %rax, %r10, %r10 -; orq %rdi, %r10, %rdi -; sarq $63, %rsi, %rsi -; testq $64, %rdx -; movq %r11, %rax -; cmovzq %rdi, %rax, %rax -; cmovzq %r11, %rsi, %rsi +; xorq %rdi, %rdi, %rdi +; testq $127, %r9 +; cmovzq %rdi, %r10, %r10 +; orq %r8, %r10, %r8 ; movq %rsi, %rdx +; sarq $63, %rdx, %rdx +; testq $64, %r9 +; movq %r11, %rax +; cmovzq %r8, %rax, %rax +; cmovzq %r11, %rdx, %rdx ; movq %rbp, %rsp ; popq %rbp ; ret @@ -143,23 +150,25 @@ block0(v0: i128, v1: i8): ; movq %rsp, %rbp ; block0: ; movq %rdx, %rcx -; shrq %cl, %rdi, %rdi +; movq %rdi, %r8 +; shrq %cl, %r8, %r8 ; movq %rsi, %r11 ; sarq %cl, %r11, %r11 ; movl $64, %ecx -; subq %rcx, %rdx, %rcx +; movq %rdx, %r9 +; subq %rcx, %r9, %rcx ; movq %rsi, %r10 ; shlq %cl, %r10, %r10 -; xorq %rax, %rax, %rax -; testq $127, %rdx -; cmovzq %rax, %r10, %r10 -; orq %rdi, %r10, %rdi -; sarq $63, %rsi, %rsi -; testq $64, %rdx -; movq %r11, %rax -; cmovzq %rdi, %rax, %rax -; cmovzq %r11, %rsi, %rsi +; xorq %rdi, %rdi, %rdi +; testq $127, %r9 +; cmovzq %rdi, %r10, %r10 +; orq %r8, %r10, %r8 ; movq %rsi, %rdx +; sarq $63, %rdx, %rdx +; testq $64, %r9 +; movq %r11, %rax +; cmovzq %r8, %rax, %rax +; cmovzq %r11, %rdx, %rdx ; movq %rbp, %rsp ; popq %rbp ; ret @@ -174,8 +183,8 @@ block0(v0: i64, v1: i128): ; movq %rsp, %rbp ; block0: ; movq %rsi, %rcx -; sarq %cl, %rdi, %rdi ; movq %rdi, %rax +; sarq %cl, %rax, %rax ; movq %rbp, %rsp ; popq %rbp ; ret @@ -190,8 +199,8 @@ block0(v0: i32, v1: i128): ; movq %rsp, %rbp ; block0: ; movq %rsi, %rcx -; sarl %cl, %edi, %edi ; movq %rdi, %rax +; sarl %cl, %eax, %eax ; movq %rbp, %rsp ; popq %rbp ; ret @@ -207,8 +216,8 @@ block0(v0: i16, v1: i128): ; block0: ; movq %rsi, %rcx ; andq %rcx, $15, %rcx -; sarw %cl, %di, %di ; movq %rdi, %rax +; sarw %cl, %ax, %ax ; movq %rbp, %rsp ; popq %rbp ; ret @@ -224,8 +233,8 @@ block0(v0: i8, v1: i128): ; block0: ; movq %rsi, %rcx ; andq %rcx, $7, %rcx -; sarb %cl, %dil, %dil ; movq %rdi, %rax +; sarb %cl, %al, %al ; movq %rbp, %rsp ; popq %rbp ; ret @@ -240,8 +249,8 @@ block0(v0: i64, v1: i64): ; movq %rsp, %rbp ; block0: ; movq %rsi, %rcx -; sarq %cl, %rdi, %rdi ; movq %rdi, %rax +; sarq %cl, %rax, %rax ; movq %rbp, %rsp ; popq %rbp ; ret @@ -256,8 +265,8 @@ block0(v0: i64, v1: i32): ; movq %rsp, %rbp ; block0: ; movq %rsi, %rcx -; sarq %cl, %rdi, %rdi ; movq %rdi, %rax +; sarq %cl, %rax, %rax ; movq %rbp, %rsp ; popq %rbp ; ret @@ -272,8 +281,8 @@ block0(v0: i64, v1: i16): ; movq %rsp, %rbp ; block0: ; movq %rsi, %rcx -; sarq %cl, %rdi, %rdi ; movq %rdi, %rax +; sarq %cl, %rax, %rax ; movq %rbp, %rsp ; popq %rbp ; ret @@ -288,8 +297,8 @@ block0(v0: i64, v1: i8): ; movq %rsp, %rbp ; block0: ; movq %rsi, %rcx -; sarq %cl, %rdi, %rdi ; movq %rdi, %rax +; sarq %cl, %rax, %rax ; movq %rbp, %rsp ; popq %rbp ; ret @@ -304,8 +313,8 @@ block0(v0: i32, v1: i64): ; movq %rsp, %rbp ; block0: ; movq %rsi, %rcx -; sarl %cl, %edi, %edi ; movq %rdi, %rax +; sarl %cl, %eax, %eax ; movq %rbp, %rsp ; popq %rbp ; ret @@ -320,8 +329,8 @@ block0(v0: i32, v1: i32): ; movq %rsp, %rbp ; block0: ; movq %rsi, %rcx -; sarl %cl, %edi, %edi ; movq %rdi, %rax +; sarl %cl, %eax, %eax ; movq %rbp, %rsp ; popq %rbp ; ret @@ -336,8 +345,8 @@ block0(v0: i32, v1: i16): ; movq %rsp, %rbp ; block0: ; movq %rsi, %rcx -; sarl %cl, %edi, %edi ; movq %rdi, %rax +; sarl %cl, %eax, %eax ; movq %rbp, %rsp ; popq %rbp ; ret @@ -352,8 +361,8 @@ block0(v0: i32, v1: i8): ; movq %rsp, %rbp ; block0: ; movq %rsi, %rcx -; sarl %cl, %edi, %edi ; movq %rdi, %rax +; sarl %cl, %eax, %eax ; movq %rbp, %rsp ; popq %rbp ; ret @@ -369,8 +378,8 @@ block0(v0: i16, v1: i64): ; block0: ; movq %rsi, %rcx ; andq %rcx, $15, %rcx -; sarw %cl, %di, %di ; movq %rdi, %rax +; sarw %cl, %ax, %ax ; movq %rbp, %rsp ; popq %rbp ; ret @@ -386,8 +395,8 @@ block0(v0: i16, v1: i32): ; block0: ; movq %rsi, %rcx ; andq %rcx, $15, %rcx -; sarw %cl, %di, %di ; movq %rdi, %rax +; sarw %cl, %ax, %ax ; movq %rbp, %rsp ; popq %rbp ; ret @@ -403,8 +412,8 @@ block0(v0: i16, v1: i16): ; block0: ; movq %rsi, %rcx ; andq %rcx, $15, %rcx -; sarw %cl, %di, %di ; movq %rdi, %rax +; sarw %cl, %ax, %ax ; movq %rbp, %rsp ; popq %rbp ; ret @@ -420,8 +429,8 @@ block0(v0: i16, v1: i8): ; block0: ; movq %rsi, %rcx ; andq %rcx, $15, %rcx -; sarw %cl, %di, %di ; movq %rdi, %rax +; sarw %cl, %ax, %ax ; movq %rbp, %rsp ; popq %rbp ; ret @@ -437,8 +446,8 @@ block0(v0: i8, v1: i64): ; block0: ; movq %rsi, %rcx ; andq %rcx, $7, %rcx -; sarb %cl, %dil, %dil ; movq %rdi, %rax +; sarb %cl, %al, %al ; movq %rbp, %rsp ; popq %rbp ; ret @@ -454,8 +463,8 @@ block0(v0: i8, v1: i32): ; block0: ; movq %rsi, %rcx ; andq %rcx, $7, %rcx -; sarb %cl, %dil, %dil ; movq %rdi, %rax +; sarb %cl, %al, %al ; movq %rbp, %rsp ; popq %rbp ; ret @@ -471,8 +480,8 @@ block0(v0: i8, v1: i16): ; block0: ; movq %rsi, %rcx ; andq %rcx, $7, %rcx -; sarb %cl, %dil, %dil ; movq %rdi, %rax +; sarb %cl, %al, %al ; movq %rbp, %rsp ; popq %rbp ; ret @@ -488,13 +497,12 @@ block0(v0: i8, v1: i8): ; block0: ; movq %rsi, %rcx ; andq %rcx, $7, %rcx -; sarb %cl, %dil, %dil ; movq %rdi, %rax +; sarb %cl, %al, %al ; movq %rbp, %rsp ; popq %rbp ; ret - function %sshr_i64_const(i64) -> i64 { block0(v0: i64): v1 = sshr_imm.i64 v0, 65 @@ -504,8 +512,8 @@ block0(v0: i64): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; sarq $1, %rdi, %rdi ; movq %rdi, %rax +; sarq $1, %rax, %rax ; movq %rbp, %rsp ; popq %rbp ; ret @@ -519,8 +527,8 @@ block0(v0: i32): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; sarl $1, %edi, %edi ; movq %rdi, %rax +; sarl $1, %eax, %eax ; movq %rbp, %rsp ; popq %rbp ; ret @@ -534,8 +542,8 @@ block0(v0: i16): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; sarw $1, %di, %di ; movq %rdi, %rax +; sarw $1, %ax, %ax ; movq %rbp, %rsp ; popq %rbp ; ret @@ -549,8 +557,8 @@ block0(v0: i8): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; sarb $1, %dil, %dil ; movq %rdi, %rax +; sarb $1, %al, %al ; movq %rbp, %rsp ; popq %rbp ; ret diff --git a/cranelift/filetests/filetests/isa/x64/struct-arg.clif b/cranelift/filetests/filetests/isa/x64/struct-arg.clif index aac589b2ce..e4c5363071 100644 --- a/cranelift/filetests/filetests/isa/x64/struct-arg.clif +++ b/cranelift/filetests/filetests/isa/x64/struct-arg.clif @@ -46,14 +46,13 @@ block0(v0: i64): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; movq %rdi, %r8 +; movq %rdi, %rsi ; subq %rsp, $64, %rsp ; virtual_sp_offset_adjust 64 ; lea 0(%rsp), %rdi -; movq %r8, %rsi ; movl $64, %edx -; load_ext_name %Memcpy+0, %rcx -; call *%rcx +; load_ext_name %Memcpy+0, %rax +; call *%rax ; call User(userextname0) ; addq %rsp, $64, %rsp ; virtual_sp_offset_adjust -64 @@ -72,20 +71,20 @@ block0(v0: i64, v1: i64): ; pushq %rbp ; movq %rsp, %rbp ; subq %rsp, $16, %rsp -; movq %r12, 0(%rsp) +; movq %r14, 0(%rsp) ; block0: -; movq %rdi, %r12 +; movq %rdi, %r14 ; subq %rsp, $64, %rsp ; virtual_sp_offset_adjust 64 ; lea 0(%rsp), %rdi ; movl $64, %edx ; load_ext_name %Memcpy+0, %rcx ; call *%rcx -; movq %r12, %rdi +; movq %r14, %rdi ; call User(userextname0) ; addq %rsp, $64, %rsp ; virtual_sp_offset_adjust -64 -; movq 0(%rsp), %r12 +; movq 0(%rsp), %r14 ; addq %rsp, $16, %rsp ; movq %rbp, %rsp ; popq %rbp @@ -122,28 +121,29 @@ block0(v0: i64, v1: i64, v2: i64): ; pushq %rbp ; movq %rsp, %rbp ; subq %rsp, $16, %rsp -; movq %rbx, 0(%rsp) -; movq %r14, 8(%rsp) +; movq %r13, 0(%rsp) +; movq %r15, 8(%rsp) ; block0: -; movq %rdx, %rbx -; movq %rdi, %r14 +; movq %rdx, %r15 +; movq %rdi, %r13 ; subq %rsp, $192, %rsp ; virtual_sp_offset_adjust 192 ; lea 0(%rsp), %rdi ; movl $128, %edx -; load_ext_name %Memcpy+0, %rcx -; call *%rcx +; load_ext_name %Memcpy+0, %r8 +; call *%r8 ; lea 128(%rsp), %rdi -; movq %rbx, %rsi +; movq %r15, %rsi ; movl $64, %edx -; load_ext_name %Memcpy+0, %rcx -; call *%rcx -; movq %r14, %rdi +; load_ext_name %Memcpy+0, %r8 +; movq %r15, %rsi +; call *%r8 +; movq %r13, %rdi ; call User(userextname0) ; addq %rsp, $192, %rsp ; virtual_sp_offset_adjust -192 -; movq 0(%rsp), %rbx -; movq 8(%rsp), %r14 +; movq 0(%rsp), %r13 +; movq 8(%rsp), %r15 ; addq %rsp, $16, %rsp ; movq %rbp, %rsp ; popq %rbp diff --git a/cranelift/filetests/filetests/isa/x64/struct-ret.clif b/cranelift/filetests/filetests/isa/x64/struct-ret.clif index 788a173c1a..b1fa7ede30 100644 --- a/cranelift/filetests/filetests/isa/x64/struct-ret.clif +++ b/cranelift/filetests/filetests/isa/x64/struct-ret.clif @@ -47,15 +47,10 @@ block0(v0: i64): ; pushq %rbp ; movq %rsp, %rbp -; subq %rsp, $16, %rsp -; movq %r15, 0(%rsp) ; block0: -; movq %rdi, %r15 -; load_ext_name %f4+0, %r8 -; call *%r8 -; movq %r15, %rax -; movq 0(%rsp), %r15 -; addq %rsp, $16, %rsp +; movq %rdi, %rax +; load_ext_name %f4+0, %rdx +; call *%rdx ; movq %rbp, %rsp ; popq %rbp ; ret diff --git a/cranelift/filetests/filetests/isa/x64/traps.clif b/cranelift/filetests/filetests/isa/x64/traps.clif index c1697b987f..fcde18bed7 100644 --- a/cranelift/filetests/filetests/isa/x64/traps.clif +++ b/cranelift/filetests/filetests/isa/x64/traps.clif @@ -22,7 +22,8 @@ block0(v0: i64, v1: i64): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; addq %rdi, %rsi, %rdi +; movq %rdi, %rcx +; addq %rcx, %rsi, %rcx ; jnb ; ud2 user0 ; ; movq %rbp, %rsp ; popq %rbp diff --git a/cranelift/filetests/filetests/isa/x64/trunc-libcall.clif b/cranelift/filetests/filetests/isa/x64/trunc-libcall.clif index 226388a1cb..5256edb2f1 100644 --- a/cranelift/filetests/filetests/isa/x64/trunc-libcall.clif +++ b/cranelift/filetests/filetests/isa/x64/trunc-libcall.clif @@ -10,8 +10,8 @@ block0(v0: f32): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; load_ext_name %TruncF32+0, %r8 -; call *%r8 +; load_ext_name %TruncF32+0, %rdx +; call *%rdx ; movq %rbp, %rsp ; popq %rbp ; ret @@ -25,8 +25,8 @@ block0(v0: f64): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; load_ext_name %TruncF64+0, %r8 -; call *%r8 +; load_ext_name %TruncF64+0, %rdx +; call *%rdx ; movq %rbp, %rsp ; popq %rbp ; ret diff --git a/cranelift/filetests/filetests/isa/x64/udiv.clif b/cranelift/filetests/filetests/isa/x64/udiv.clif index 46dae76eab..71ad6b75ed 100644 --- a/cranelift/filetests/filetests/isa/x64/udiv.clif +++ b/cranelift/filetests/filetests/isa/x64/udiv.clif @@ -10,8 +10,7 @@ block0(v0: i8, v1: i8): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; movzbl %dil, %r9d -; movq %r9, %rax +; movzbl %dil, %eax ; div %al, (none), %sil, %al, (none) ; movq %rbp, %rsp ; popq %rbp @@ -26,9 +25,8 @@ block0(v0: i16, v1: i16): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; movl $0, %r10d ; movq %rdi, %rax -; movq %r10, %rdx +; movl $0, %edx ; div %ax, %dx, %si, %ax, %dx ; movq %rbp, %rsp ; popq %rbp @@ -43,9 +41,8 @@ block0(v0: i32, v1: i32): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; movl $0, %r10d ; movq %rdi, %rax -; movq %r10, %rdx +; movl $0, %edx ; div %eax, %edx, %esi, %eax, %edx ; movq %rbp, %rsp ; popq %rbp @@ -60,9 +57,8 @@ block0(v0: i64, v1: i64): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; movl $0, %r10d ; movq %rdi, %rax -; movq %r10, %rdx +; movl $0, %edx ; div %rax, %rdx, %rsi, %rax, %rdx ; movq %rbp, %rsp ; popq %rbp diff --git a/cranelift/filetests/filetests/isa/x64/uextend-elision.clif b/cranelift/filetests/filetests/isa/x64/uextend-elision.clif index 1f88ad6538..7ffc50c086 100644 --- a/cranelift/filetests/filetests/isa/x64/uextend-elision.clif +++ b/cranelift/filetests/filetests/isa/x64/uextend-elision.clif @@ -11,8 +11,8 @@ block0(v0: i32, v1: i32): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; addl %edi, %esi, %edi ; movq %rdi, %rax +; addl %eax, %esi, %eax ; movq %rbp, %rsp ; popq %rbp ; ret diff --git a/cranelift/filetests/filetests/isa/x64/umax-bug.clif b/cranelift/filetests/filetests/isa/x64/umax-bug.clif index f0272041c4..63ea9b4c6e 100644 --- a/cranelift/filetests/filetests/isa/x64/umax-bug.clif +++ b/cranelift/filetests/filetests/isa/x64/umax-bug.clif @@ -13,8 +13,8 @@ block0(v1: i32, v2: i64): ; block0: ; movl 0(%rsi), %r8d ; cmpl %edi, %r8d -; cmovnbl %r8d, %edi, %edi ; movq %rdi, %rax +; cmovnbl %r8d, %eax, %eax ; movq %rbp, %rsp ; popq %rbp ; ret diff --git a/cranelift/filetests/filetests/isa/x64/urem.clif b/cranelift/filetests/filetests/isa/x64/urem.clif index b6ba479bae..d89984faba 100644 --- a/cranelift/filetests/filetests/isa/x64/urem.clif +++ b/cranelift/filetests/filetests/isa/x64/urem.clif @@ -10,8 +10,7 @@ block0(v0: i8, v1: i8): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; movzbl %dil, %r9d -; movq %r9, %rax +; movzbl %dil, %eax ; div %al, (none), %sil, %al, (none) ; shrq $8, %rax, %rax ; movq %rbp, %rsp @@ -27,9 +26,8 @@ block0(v0: i16, v1: i16): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; movl $0, %r10d ; movq %rdi, %rax -; movq %r10, %rdx +; movl $0, %edx ; div %ax, %dx, %si, %ax, %dx ; movq %rdx, %rax ; movq %rbp, %rsp @@ -45,9 +43,8 @@ block0(v0: i32, v1: i32): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; movl $0, %r10d ; movq %rdi, %rax -; movq %r10, %rdx +; movl $0, %edx ; div %eax, %edx, %esi, %eax, %edx ; movq %rdx, %rax ; movq %rbp, %rsp @@ -63,9 +60,8 @@ block0(v0: i64, v1: i64): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; movl $0, %r10d ; movq %rdi, %rax -; movq %r10, %rdx +; movl $0, %edx ; div %rax, %rdx, %rsi, %rax, %rdx ; movq %rdx, %rax ; movq %rbp, %rsp diff --git a/cranelift/filetests/filetests/isa/x64/ushr.clif b/cranelift/filetests/filetests/isa/x64/ushr.clif index b7b5551ade..401e6c9265 100644 --- a/cranelift/filetests/filetests/isa/x64/ushr.clif +++ b/cranelift/filetests/filetests/isa/x64/ushr.clif @@ -16,22 +16,24 @@ block0(v0: i128, v1: i8): ; movq %rsp, %rbp ; block0: ; movzbq %dl, %rcx -; shrq %cl, %rdi, %rdi +; movq %rdi, %r10 +; shrq %cl, %r10, %r10 ; movq %rsi, %r8 ; shrq %cl, %r8, %r8 -; movq %rcx, %rax +; movq %rcx, %rdi ; movl $64, %ecx -; movq %rax, %r9 +; movq %rdi, %r9 ; subq %rcx, %r9, %rcx -; shlq %cl, %rsi, %rsi +; movq %rsi, %rdi +; shlq %cl, %rdi, %rdi ; xorq %r11, %r11, %r11 ; testq $127, %r9 -; cmovzq %r11, %rsi, %rsi -; orq %rsi, %rdi, %rsi +; cmovzq %r11, %rdi, %rdi +; orq %rdi, %r10, %rdi ; xorq %rdx, %rdx, %rdx ; testq $64, %r9 ; movq %r8, %rax -; cmovzq %rsi, %rax, %rax +; cmovzq %rdi, %rax, %rax ; cmovzq %r8, %rdx, %rdx ; movq %rbp, %rsp ; popq %rbp @@ -47,7 +49,8 @@ block0(v0: i128, v1: i64): ; movq %rsp, %rbp ; block0: ; movq %rdx, %rcx -; shrq %cl, %rdi, %rdi +; movq %rdi, %r9 +; shrq %cl, %r9, %r9 ; movq %rsi, %r11 ; shrq %cl, %r11, %r11 ; movl $64, %ecx @@ -57,7 +60,7 @@ block0(v0: i128, v1: i64): ; xorq %r10, %r10, %r10 ; testq $127, %r8 ; cmovzq %r10, %rsi, %rsi -; orq %rsi, %rdi, %rsi +; orq %rsi, %r9, %rsi ; xorq %rdx, %rdx, %rdx ; testq $64, %r8 ; movq %r11, %rax @@ -77,7 +80,8 @@ block0(v0: i128, v1: i32): ; movq %rsp, %rbp ; block0: ; movq %rdx, %rcx -; shrq %cl, %rdi, %rdi +; movq %rdi, %r9 +; shrq %cl, %r9, %r9 ; movq %rsi, %r11 ; shrq %cl, %r11, %r11 ; movl $64, %ecx @@ -87,7 +91,7 @@ block0(v0: i128, v1: i32): ; xorq %r10, %r10, %r10 ; testq $127, %r8 ; cmovzq %r10, %rsi, %rsi -; orq %rsi, %rdi, %rsi +; orq %rsi, %r9, %rsi ; xorq %rdx, %rdx, %rdx ; testq $64, %r8 ; movq %r11, %rax @@ -107,7 +111,8 @@ block0(v0: i128, v1: i16): ; movq %rsp, %rbp ; block0: ; movq %rdx, %rcx -; shrq %cl, %rdi, %rdi +; movq %rdi, %r9 +; shrq %cl, %r9, %r9 ; movq %rsi, %r11 ; shrq %cl, %r11, %r11 ; movl $64, %ecx @@ -117,7 +122,7 @@ block0(v0: i128, v1: i16): ; xorq %r10, %r10, %r10 ; testq $127, %r8 ; cmovzq %r10, %rsi, %rsi -; orq %rsi, %rdi, %rsi +; orq %rsi, %r9, %rsi ; xorq %rdx, %rdx, %rdx ; testq $64, %r8 ; movq %r11, %rax @@ -137,7 +142,8 @@ block0(v0: i128, v1: i8): ; movq %rsp, %rbp ; block0: ; movq %rdx, %rcx -; shrq %cl, %rdi, %rdi +; movq %rdi, %r9 +; shrq %cl, %r9, %r9 ; movq %rsi, %r11 ; shrq %cl, %r11, %r11 ; movl $64, %ecx @@ -147,7 +153,7 @@ block0(v0: i128, v1: i8): ; xorq %r10, %r10, %r10 ; testq $127, %r8 ; cmovzq %r10, %rsi, %rsi -; orq %rsi, %rdi, %rsi +; orq %rsi, %r9, %rsi ; xorq %rdx, %rdx, %rdx ; testq $64, %r8 ; movq %r11, %rax @@ -167,8 +173,8 @@ block0(v0: i64, v1: i128): ; movq %rsp, %rbp ; block0: ; movq %rsi, %rcx -; shrq %cl, %rdi, %rdi ; movq %rdi, %rax +; shrq %cl, %rax, %rax ; movq %rbp, %rsp ; popq %rbp ; ret @@ -184,8 +190,8 @@ block0(v0: i32, v1: i64, v2: i64): ; movq %rsp, %rbp ; block0: ; movq %rsi, %rcx -; shrl %cl, %edi, %edi ; movq %rdi, %rax +; shrl %cl, %eax, %eax ; movq %rbp, %rsp ; popq %rbp ; ret @@ -201,8 +207,8 @@ block0(v0: i16, v1: i128): ; block0: ; movq %rsi, %rcx ; andq %rcx, $15, %rcx -; shrw %cl, %di, %di ; movq %rdi, %rax +; shrw %cl, %ax, %ax ; movq %rbp, %rsp ; popq %rbp ; ret @@ -218,8 +224,8 @@ block0(v0: i8, v1: i128): ; block0: ; movq %rsi, %rcx ; andq %rcx, $7, %rcx -; shrb %cl, %dil, %dil ; movq %rdi, %rax +; shrb %cl, %al, %al ; movq %rbp, %rsp ; popq %rbp ; ret @@ -234,8 +240,8 @@ block0(v0: i64, v1: i64): ; movq %rsp, %rbp ; block0: ; movq %rsi, %rcx -; shrq %cl, %rdi, %rdi ; movq %rdi, %rax +; shrq %cl, %rax, %rax ; movq %rbp, %rsp ; popq %rbp ; ret @@ -250,8 +256,8 @@ block0(v0: i64, v1: i32): ; movq %rsp, %rbp ; block0: ; movq %rsi, %rcx -; shrq %cl, %rdi, %rdi ; movq %rdi, %rax +; shrq %cl, %rax, %rax ; movq %rbp, %rsp ; popq %rbp ; ret @@ -266,8 +272,8 @@ block0(v0: i64, v1: i16): ; movq %rsp, %rbp ; block0: ; movq %rsi, %rcx -; shrq %cl, %rdi, %rdi ; movq %rdi, %rax +; shrq %cl, %rax, %rax ; movq %rbp, %rsp ; popq %rbp ; ret @@ -282,8 +288,8 @@ block0(v0: i64, v1: i8): ; movq %rsp, %rbp ; block0: ; movq %rsi, %rcx -; shrq %cl, %rdi, %rdi ; movq %rdi, %rax +; shrq %cl, %rax, %rax ; movq %rbp, %rsp ; popq %rbp ; ret @@ -298,8 +304,8 @@ block0(v0: i32, v1: i64): ; movq %rsp, %rbp ; block0: ; movq %rsi, %rcx -; shrl %cl, %edi, %edi ; movq %rdi, %rax +; shrl %cl, %eax, %eax ; movq %rbp, %rsp ; popq %rbp ; ret @@ -314,8 +320,8 @@ block0(v0: i32, v1: i32): ; movq %rsp, %rbp ; block0: ; movq %rsi, %rcx -; shrl %cl, %edi, %edi ; movq %rdi, %rax +; shrl %cl, %eax, %eax ; movq %rbp, %rsp ; popq %rbp ; ret @@ -330,8 +336,8 @@ block0(v0: i32, v1: i16): ; movq %rsp, %rbp ; block0: ; movq %rsi, %rcx -; shrl %cl, %edi, %edi ; movq %rdi, %rax +; shrl %cl, %eax, %eax ; movq %rbp, %rsp ; popq %rbp ; ret @@ -346,8 +352,8 @@ block0(v0: i32, v1: i8): ; movq %rsp, %rbp ; block0: ; movq %rsi, %rcx -; shrl %cl, %edi, %edi ; movq %rdi, %rax +; shrl %cl, %eax, %eax ; movq %rbp, %rsp ; popq %rbp ; ret @@ -363,8 +369,8 @@ block0(v0: i16, v1: i64): ; block0: ; movq %rsi, %rcx ; andq %rcx, $15, %rcx -; shrw %cl, %di, %di ; movq %rdi, %rax +; shrw %cl, %ax, %ax ; movq %rbp, %rsp ; popq %rbp ; ret @@ -380,8 +386,8 @@ block0(v0: i16, v1: i32): ; block0: ; movq %rsi, %rcx ; andq %rcx, $15, %rcx -; shrw %cl, %di, %di ; movq %rdi, %rax +; shrw %cl, %ax, %ax ; movq %rbp, %rsp ; popq %rbp ; ret @@ -397,8 +403,8 @@ block0(v0: i16, v1: i16): ; block0: ; movq %rsi, %rcx ; andq %rcx, $15, %rcx -; shrw %cl, %di, %di ; movq %rdi, %rax +; shrw %cl, %ax, %ax ; movq %rbp, %rsp ; popq %rbp ; ret @@ -414,8 +420,8 @@ block0(v0: i16, v1: i8): ; block0: ; movq %rsi, %rcx ; andq %rcx, $15, %rcx -; shrw %cl, %di, %di ; movq %rdi, %rax +; shrw %cl, %ax, %ax ; movq %rbp, %rsp ; popq %rbp ; ret @@ -431,8 +437,8 @@ block0(v0: i8, v1: i64): ; block0: ; movq %rsi, %rcx ; andq %rcx, $7, %rcx -; shrb %cl, %dil, %dil ; movq %rdi, %rax +; shrb %cl, %al, %al ; movq %rbp, %rsp ; popq %rbp ; ret @@ -448,8 +454,8 @@ block0(v0: i8, v1: i32): ; block0: ; movq %rsi, %rcx ; andq %rcx, $7, %rcx -; shrb %cl, %dil, %dil ; movq %rdi, %rax +; shrb %cl, %al, %al ; movq %rbp, %rsp ; popq %rbp ; ret @@ -465,8 +471,8 @@ block0(v0: i8, v1: i16): ; block0: ; movq %rsi, %rcx ; andq %rcx, $7, %rcx -; shrb %cl, %dil, %dil ; movq %rdi, %rax +; shrb %cl, %al, %al ; movq %rbp, %rsp ; popq %rbp ; ret @@ -482,14 +488,12 @@ block0(v0: i8, v1: i8): ; block0: ; movq %rsi, %rcx ; andq %rcx, $7, %rcx -; shrb %cl, %dil, %dil ; movq %rdi, %rax +; shrb %cl, %al, %al ; movq %rbp, %rsp ; popq %rbp ; ret - - function %ushr_i64_const(i64) -> i64 { block0(v0: i64): v1 = ushr_imm.i64 v0, 65 @@ -499,8 +503,8 @@ block0(v0: i64): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; shrq $1, %rdi, %rdi ; movq %rdi, %rax +; shrq $1, %rax, %rax ; movq %rbp, %rsp ; popq %rbp ; ret @@ -514,8 +518,8 @@ block0(v0: i32): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; shrl $1, %edi, %edi ; movq %rdi, %rax +; shrl $1, %eax, %eax ; movq %rbp, %rsp ; popq %rbp ; ret @@ -529,8 +533,8 @@ block0(v0: i16): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; shrw $1, %di, %di ; movq %rdi, %rax +; shrw $1, %ax, %ax ; movq %rbp, %rsp ; popq %rbp ; ret @@ -544,8 +548,8 @@ block0(v0: i8): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; shrb $1, %dil, %dil ; movq %rdi, %rax +; shrb $1, %al, %al ; movq %rbp, %rsp ; popq %rbp ; ret diff --git a/cranelift/filetests/filetests/isa/x64/uunarrow.clif b/cranelift/filetests/filetests/isa/x64/uunarrow.clif index 6b3a1bcce9..facc115bd1 100644 --- a/cranelift/filetests/filetests/isa/x64/uunarrow.clif +++ b/cranelift/filetests/filetests/isa/x64/uunarrow.clif @@ -13,10 +13,11 @@ block0(v0: f64x2): ; movq %rsp, %rbp ; block0: ; xorpd %xmm3, %xmm3, %xmm3 -; maxpd %xmm0, %xmm3, %xmm0 -; movupd const(0), %xmm7 -; minpd %xmm0, %xmm7, %xmm0 -; roundpd $3, %xmm0, %xmm0 +; movdqa %xmm0, %xmm7 +; maxpd %xmm7, %xmm3, %xmm7 +; movupd const(0), %xmm8 +; minpd %xmm7, %xmm8, %xmm7 +; roundpd $3, %xmm7, %xmm0 ; movupd const(1), %xmm13 ; addpd %xmm0, %xmm13, %xmm0 ; shufps $136, %xmm0, %xmm3, %xmm0 diff --git a/cranelift/filetests/filetests/isa/x64/vhigh_bits.clif b/cranelift/filetests/filetests/isa/x64/vhigh_bits.clif index 185cb62764..538d6ddcd8 100644 --- a/cranelift/filetests/filetests/isa/x64/vhigh_bits.clif +++ b/cranelift/filetests/filetests/isa/x64/vhigh_bits.clif @@ -38,8 +38,9 @@ block0(v0: i16x8): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; packsswb %xmm0, %xmm0, %xmm0 -; pmovmskb %xmm0, %eax +; movdqa %xmm0, %xmm3 +; packsswb %xmm3, %xmm0, %xmm3 +; pmovmskb %xmm3, %eax ; shrq $8, %rax, %rax ; movq %rbp, %rsp ; popq %rbp diff --git a/cranelift/filetests/filetests/isa/x64/widening.clif b/cranelift/filetests/filetests/isa/x64/widening.clif index 202a6d4389..9157cbff01 100644 --- a/cranelift/filetests/filetests/isa/x64/widening.clif +++ b/cranelift/filetests/filetests/isa/x64/widening.clif @@ -52,8 +52,9 @@ block0(v0: i8x16): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; palignr $8, %xmm0, %xmm0, %xmm0 -; pmovsxbw %xmm0, %xmm0 +; movdqa %xmm0, %xmm3 +; palignr $8, %xmm3, %xmm0, %xmm3 +; pmovsxbw %xmm3, %xmm0 ; movq %rbp, %rsp ; popq %rbp ; ret @@ -67,8 +68,9 @@ block0(v0: i16x8): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; palignr $8, %xmm0, %xmm0, %xmm0 -; pmovsxwd %xmm0, %xmm0 +; movdqa %xmm0, %xmm3 +; palignr $8, %xmm3, %xmm0, %xmm3 +; pmovsxwd %xmm3, %xmm0 ; movq %rbp, %rsp ; popq %rbp ; ret @@ -139,8 +141,9 @@ block0(v0: i8x16): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; palignr $8, %xmm0, %xmm0, %xmm0 -; pmovzxbw %xmm0, %xmm0 +; movdqa %xmm0, %xmm3 +; palignr $8, %xmm3, %xmm0, %xmm3 +; pmovzxbw %xmm3, %xmm0 ; movq %rbp, %rsp ; popq %rbp ; ret @@ -154,8 +157,9 @@ block0(v0: i16x8): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; palignr $8, %xmm0, %xmm0, %xmm0 -; pmovzxwd %xmm0, %xmm0 +; movdqa %xmm0, %xmm3 +; palignr $8, %xmm3, %xmm0, %xmm3 +; pmovzxwd %xmm3, %xmm0 ; movq %rbp, %rsp ; popq %rbp ; ret diff --git a/supply-chain/audits.toml b/supply-chain/audits.toml index 6a47bcbf31..a67e3b3e37 100644 --- a/supply-chain/audits.toml +++ b/supply-chain/audits.toml @@ -170,6 +170,12 @@ criteria = "safe-to-deploy" delta = "0.3.1 -> 0.3.2" notes = "The Bytecode Alliance is the author of this crate." +[[audits.regalloc2]] +who = "Chris Fallin " +criteria = "safe-to-deploy" +delta = "0.3.2 -> 0.4.0" +notes = "The Bytecode Alliance is the author of this crate." + [[audits.rustc-demangle]] who = "Alex Crichton " criteria = "safe-to-deploy"