diff --git a/cranelift/codegen/src/isa/x64/abi.rs b/cranelift/codegen/src/isa/x64/abi.rs index 4bc22357fd..b74cb39cfc 100644 --- a/cranelift/codegen/src/isa/x64/abi.rs +++ b/cranelift/codegen/src/isa/x64/abi.rs @@ -325,10 +325,13 @@ impl ABIBody for X64ABIBody { self.fp_to_arg_offset() + off <= u32::max_value() as i64, "large offset nyi" ); - load_stack( - Amode::imm_reg((self.fp_to_arg_offset() + off) as u32, regs::rbp()), - to_reg, + let from_addr = Amode::imm_reg((self.fp_to_arg_offset() + off) as u32, regs::rbp()); + Inst::load( ty, + from_addr, + to_reg, + ExtKind::ZeroExtend, + /* infallible load */ None, ) } } @@ -420,8 +423,10 @@ impl ABIBody for X64ABIBody { "large stack return offset nyi" ); - let mem = Amode::imm_reg(off as u32, self.ret_area_ptr.unwrap().to_reg()); - ret.push(store_stack(mem, from_reg.to_reg(), ty)) + let from_reg = from_reg.to_reg(); + let to_mem = Amode::imm_reg(off as u32, self.ret_area_ptr.unwrap().to_reg()); + let store = Inst::store(ty, from_reg, to_mem, /* infallible store */ None); + ret.push(store) } } @@ -464,17 +469,20 @@ impl ABIBody for X64ABIBody { unimplemented!("store_stackslot") } - fn load_spillslot(&self, slot: SpillSlot, ty: Type, into_reg: Writable) -> Inst { + fn load_spillslot(&self, slot: SpillSlot, ty: Type, to_reg: Writable) -> Inst { // Offset from beginning of spillslot area, which is at nominal-SP + stackslots_size. let islot = slot.get() as i64; let spill_off = islot * 8; let sp_off = self.stack_slots_size as i64 + spill_off; debug_assert!(sp_off <= u32::max_value() as i64, "large spill offsets NYI"); trace!("load_spillslot: slot {:?} -> sp_off {}", slot, sp_off); - load_stack( - SyntheticAmode::nominal_sp_offset(sp_off as u32), - into_reg, + let from_addr = SyntheticAmode::nominal_sp_offset(sp_off as u32); + Inst::load( ty, + from_addr, + to_reg, + ExtKind::ZeroExtend, + /* infallible load */ None, ) } @@ -485,11 +493,8 @@ impl ABIBody for X64ABIBody { let sp_off = self.stack_slots_size as i64 + spill_off; debug_assert!(sp_off <= u32::max_value() as i64, "large spill offsets NYI"); trace!("store_spillslot: slot {:?} -> sp_off {}", slot, sp_off); - store_stack( - SyntheticAmode::nominal_sp_offset(sp_off as u32), - from_reg, - ty, - ) + let to_mem = SyntheticAmode::nominal_sp_offset(sp_off as u32); + Inst::store(ty, from_reg, to_mem, /* infallible store */ None) } fn spillslots_to_stack_map(&self, slots: &[SpillSlot], state: &EmitState) -> StackMap { @@ -1003,66 +1008,6 @@ fn adjust_stack>(ctx: &mut C, amount: u64, is_sub: bool) { } } -fn load_stack(mem: impl Into, into_reg: Writable, ty: Type) -> Inst { - let (is_int, ext_mode) = match ty { - types::B1 | types::B8 | types::I8 => (true, Some(ExtMode::BQ)), - types::B16 | types::I16 => (true, Some(ExtMode::WQ)), - types::B32 | types::I32 => (true, Some(ExtMode::LQ)), - types::B64 | types::I64 | types::R64 => (true, None), - types::F32 | types::F64 => (false, None), - _ => panic!("load_stack({})", ty), - }; - - let mem = mem.into(); - - if is_int { - match ext_mode { - Some(ext_mode) => Inst::movsx_rm_r( - ext_mode, - RegMem::mem(mem), - into_reg, - /* infallible load */ None, - ), - None => Inst::mov64_m_r(mem, into_reg, None /* infallible */), - } - } else { - let sse_op = match ty { - types::F32 => SseOpcode::Movss, - types::F64 => SseOpcode::Movsd, - _ => unreachable!(), - }; - Inst::xmm_mov( - sse_op, - RegMem::mem(mem), - into_reg, - None, /* infallible */ - ) - } -} - -fn store_stack(mem: impl Into, from_reg: Reg, ty: Type) -> Inst { - let (is_int, size) = match ty { - types::B1 | types::B8 | types::I8 => (true, 1), - types::B16 | types::I16 => (true, 2), - types::B32 | types::I32 => (true, 4), - types::B64 | types::I64 | types::R64 => (true, 8), - types::F32 => (false, 4), - types::F64 => (false, 8), - _ => unimplemented!("store_stack({})", ty), - }; - let mem = mem.into(); - if is_int { - Inst::mov_r_m(size, from_reg, mem, /* infallible store */ None) - } else { - let sse_op = match size { - 4 => SseOpcode::Movss, - 8 => SseOpcode::Movsd, - _ => unreachable!(), - }; - Inst::xmm_mov_r_m(sse_op, from_reg, mem, /* infallible store */ None) - } -} - /// X64 ABI object for a function call. pub struct X64ABICall { sig: ABISig, @@ -1212,11 +1157,9 @@ impl ABICall for X64ABICall { debug_assert!(off <= u32::max_value() as i64); debug_assert!(off >= 0); - ctx.emit(store_stack( - Amode::imm_reg(off as u32, regs::rsp()), - from_reg, - ty, - )) + let to_mem = Amode::imm_reg(off as u32, regs::rsp()); + let store = Inst::store(ty, from_reg, to_mem, /* infallible store */ None); + ctx.emit(store) } } } @@ -1225,21 +1168,25 @@ impl ABICall for X64ABICall { &self, ctx: &mut C, idx: usize, - into_reg: Writable, + to_reg: Writable, ) { match &self.sig.rets[idx] { - &ABIArg::Reg(reg, ty, _) => ctx.emit(Inst::gen_move(into_reg, reg.to_reg(), ty)), + &ABIArg::Reg(reg, ty, _) => ctx.emit(Inst::gen_move(to_reg, reg.to_reg(), ty)), &ABIArg::Stack(off, ty, _) => { let ret_area_base = self.sig.stack_arg_space; let sp_offset = off + ret_area_base; // TODO handle offsets bigger than u32::max debug_assert!(sp_offset >= 0); debug_assert!(sp_offset <= u32::max_value() as i64); - ctx.emit(load_stack( - Amode::imm_reg(sp_offset as u32, regs::rsp()), - into_reg, + let from_addr = Amode::imm_reg(sp_offset as u32, regs::rsp()); + let load = Inst::load( ty, - )); + from_addr, + to_reg, + ExtKind::ZeroExtend, + /* infallible load */ None, + ); + ctx.emit(load); } } } diff --git a/cranelift/codegen/src/isa/x64/inst/args.rs b/cranelift/codegen/src/isa/x64/inst/args.rs index 6b13b5283e..343f3322d0 100644 --- a/cranelift/codegen/src/isa/x64/inst/args.rs +++ b/cranelift/codegen/src/isa/x64/inst/args.rs @@ -634,6 +634,16 @@ impl fmt::Display for SseOpcode { } } +/// This defines the ways a value can be extended: either signed- or zero-extension, or none for +/// types that are not extended. Contrast with [ExtMode], which defines the widths from and to which +/// values can be extended. +#[derive(Clone, PartialEq)] +pub enum ExtKind { + None, + SignExtend, + ZeroExtend, +} + /// These indicate ways of extending (widening) a value, using the Intel /// naming: B(yte) = u8, W(ord) = u16, L(ong)word = u32, Q(uad)word = u64 #[derive(Clone, PartialEq)] diff --git a/cranelift/codegen/src/isa/x64/inst/emit.rs b/cranelift/codegen/src/isa/x64/inst/emit.rs index f2633bb1db..9bae562c5c 100644 --- a/cranelift/codegen/src/isa/x64/inst/emit.rs +++ b/cranelift/codegen/src/isa/x64/inst/emit.rs @@ -1,6 +1,6 @@ use crate::binemit::Reloc; use crate::ir::immediates::{Ieee32, Ieee64}; -use crate::ir::{types, TrapCode}; +use crate::ir::TrapCode; use crate::isa::x64::inst::args::*; use crate::isa::x64::inst::*; use crate::machinst::{MachBuffer, MachInstEmit, MachLabel}; @@ -1807,17 +1807,9 @@ pub(crate) fn emit( // "constant inline" code should be replaced by constant pool integration. // Load the inline constant. - let opcode = match *ty { - types::F32X4 => SseOpcode::Movups, - types::F64X2 => SseOpcode::Movupd, - types::I8X16 => SseOpcode::Movupd, // TODO replace with MOVDQU - _ => unimplemented!("cannot yet load constants for type: {}", ty), - }; let constant_start_label = sink.get_label(); - let load_offset = RegMem::mem(Amode::rip_relative(BranchTarget::Label( - constant_start_label, - ))); - let load = Inst::xmm_unary_rm_r(opcode, load_offset, *dst); + let load_offset = Amode::rip_relative(BranchTarget::Label(constant_start_label)); + let load = Inst::load(*ty, load_offset, *dst, ExtKind::None, None); load.emit(sink, flags, state); // Jump over the constant. diff --git a/cranelift/codegen/src/isa/x64/inst/mod.rs b/cranelift/codegen/src/isa/x64/inst/mod.rs index 145831c01e..712a9b508e 100644 --- a/cranelift/codegen/src/isa/x64/inst/mod.rs +++ b/cranelift/codegen/src/isa/x64/inst/mod.rs @@ -526,6 +526,7 @@ impl Inst { Inst::Mov_R_R { is_64, src, dst } } + // TODO Can be replaced by `Inst::move` (high-level) and `Inst::unary_rm_r` (low-level) pub(crate) fn xmm_mov( op: SseOpcode, src: RegMem, @@ -935,6 +936,85 @@ impl Inst { srcloc, } } + + /// Choose which instruction to use for loading a register value from memory. For loads smaller + /// than 64 bits, this method expects a way to extend the value (i.e. [ExtKind::SignExtend], + /// [ExtKind::ZeroExtend]); loads with no extension necessary will ignore this. + pub(crate) fn load( + ty: Type, + from_addr: impl Into, + to_reg: Writable, + ext_kind: ExtKind, + srcloc: Option, + ) -> Inst { + let rc = to_reg.to_reg().get_class(); + match rc { + RegClass::I64 => { + let ext_mode = match ty.bytes() { + 1 => Some(ExtMode::BQ), + 2 => Some(ExtMode::WQ), + 4 => Some(ExtMode::LQ), + 8 => None, + _ => unreachable!("the type should never use a scalar load: {}", ty), + }; + if let Some(ext_mode) = ext_mode { + // Values smaller than 64 bits must be extended in some way. + match ext_kind { + ExtKind::SignExtend => { + Inst::movsx_rm_r(ext_mode, RegMem::mem(from_addr), to_reg, srcloc) + } + ExtKind::ZeroExtend => { + Inst::movzx_rm_r(ext_mode, RegMem::mem(from_addr), to_reg, srcloc) + } + ExtKind::None => panic!( + "expected an extension kind for extension mode: {:?}", + ext_mode + ), + } + } else { + // 64-bit values can be moved directly. + Inst::mov64_m_r(from_addr, to_reg, srcloc) + } + } + RegClass::V128 => { + let opcode = match ty { + types::F32 => SseOpcode::Movss, + types::F64 => SseOpcode::Movsd, + types::F32X4 => SseOpcode::Movups, + types::F64X2 => SseOpcode::Movupd, + _ if ty.is_vector() && ty.bits() == 128 => SseOpcode::Movdqu, + _ => unimplemented!("unable to load type: {}", ty), + }; + Inst::xmm_unary_rm_r(opcode, RegMem::mem(from_addr), to_reg) + } + _ => panic!("unable to generate load for register class: {:?}", rc), + } + } + + /// Choose which instruction to use for storing a register value to memory. + pub(crate) fn store( + ty: Type, + from_reg: Reg, + to_addr: impl Into, + srcloc: Option, + ) -> Inst { + let rc = from_reg.get_class(); + match rc { + RegClass::I64 => Inst::mov_r_m(ty.bytes() as u8, from_reg, to_addr, srcloc), + RegClass::V128 => { + let opcode = match ty { + types::F32 => SseOpcode::Movss, + types::F64 => SseOpcode::Movsd, + types::F32X4 => SseOpcode::Movups, + types::F64X2 => SseOpcode::Movupd, + _ if ty.is_vector() && ty.bits() == 128 => SseOpcode::Movdqu, + _ => unimplemented!("unable to store type: {}", ty), + }; + Inst::xmm_mov_r_m(opcode, from_reg, to_addr, srcloc) + } + _ => panic!("unable to generate store for register class: {:?}", rc), + } + } } // Inst helpers. @@ -2093,16 +2173,18 @@ impl MachInst for Inst { debug_assert!(rc_dst == rc_src); match rc_dst { RegClass::I64 => Inst::mov_r_r(true, src_reg, dst_reg), - RegClass::V128 => match ty { - types::F32 => Inst::xmm_mov(SseOpcode::Movss, RegMem::reg(src_reg), dst_reg, None), - types::F64 => Inst::xmm_mov(SseOpcode::Movsd, RegMem::reg(src_reg), dst_reg, None), - _ if ty.is_vector() && ty.bits() == 128 => { - // TODO Specialize this move for different types: MOVUPD, MOVDQU, etc. - Inst::xmm_mov(SseOpcode::Movups, RegMem::reg(src_reg), dst_reg, None) - } - _ => panic!("unexpected type {:?} in gen_move of regclass V128", ty), - }, - _ => panic!("gen_move(x64): unhandled regclass"), + RegClass::V128 => { + let opcode = match ty { + types::F32 => SseOpcode::Movss, + types::F64 => SseOpcode::Movsd, + types::F32X4 => SseOpcode::Movaps, + types::F64X2 => SseOpcode::Movapd, + _ if ty.is_vector() && ty.bits() == 128 => SseOpcode::Movdqa, + _ => unimplemented!("unable to move type: {}", ty), + }; + Inst::xmm_unary_rm_r(opcode, RegMem::reg(src_reg), dst_reg) + } + _ => panic!("gen_move(x64): unhandled regclass {:?}", rc_dst), } }