machinst x64: support out-of-bounds memory accesses;

This commit is contained in:
Benjamin Bouvier
2020-07-08 15:22:00 +02:00
parent ea33ce9116
commit 5a55646fc3
6 changed files with 598 additions and 323 deletions

View File

@@ -364,6 +364,7 @@ impl ABIBody for X64ABIBody {
ext_mode,
RegMem::reg(from_reg.to_reg()),
dest_reg,
/* infallible load */ None,
));
}
(ArgumentExtension::Sext, Some(ext_mode)) => {
@@ -371,6 +372,7 @@ impl ABIBody for X64ABIBody {
ext_mode,
RegMem::reg(from_reg.to_reg()),
dest_reg,
/* infallible load */ None,
));
}
_ => ret.push(Inst::gen_move(dest_reg, from_reg.to_reg(), ty)),
@@ -394,6 +396,7 @@ impl ABIBody for X64ABIBody {
ext_mode,
RegMem::reg(from_reg.to_reg()),
from_reg,
/* infallible load */ None,
));
}
(ArgumentExtension::Sext, Some(ext_mode)) => {
@@ -401,6 +404,7 @@ impl ABIBody for X64ABIBody {
ext_mode,
RegMem::reg(from_reg.to_reg()),
from_reg,
/* infallible load */ None,
));
}
_ => {}
@@ -965,8 +969,13 @@ fn load_stack(mem: impl Into<SyntheticAmode>, into_reg: Writable<Reg>, ty: Type)
let mem = mem.into();
match ext_mode {
Some(ext_mode) => Inst::movsx_rm_r(ext_mode, RegMem::mem(mem), into_reg),
None => Inst::mov64_m_r(mem, into_reg),
Some(ext_mode) => Inst::movsx_rm_r(
ext_mode,
RegMem::mem(mem),
into_reg,
/* infallible load */ None,
),
None => Inst::mov64_m_r(mem, into_reg, None /* infallible */),
}
}
@@ -982,7 +991,7 @@ fn store_stack(mem: impl Into<SyntheticAmode>, from_reg: Reg, ty: Type) -> Inst
};
let mem = mem.into();
if is_int {
Inst::mov_r_m(size, from_reg, mem)
Inst::mov_r_m(size, from_reg, mem, /* infallible store */ None)
} else {
unimplemented!("f32/f64 store_stack");
}

View File

@@ -234,12 +234,11 @@ impl RegMem {
pub(crate) fn mem(addr: impl Into<SyntheticAmode>) -> Self {
Self::Mem { addr: addr.into() }
}
/// Add the regs mentioned by `self` to `collector`.
pub(crate) fn get_regs_as_uses(&self, collector: &mut RegUsageCollector) {
match self {
RegMem::Reg { reg } => collector.add_use(*reg),
RegMem::Mem { addr } => addr.get_regs_as_uses(collector),
RegMem::Mem { addr, .. } => addr.get_regs_as_uses(collector),
}
}
}
@@ -252,7 +251,7 @@ impl ShowWithRRU for RegMem {
fn show_rru_sized(&self, mb_rru: Option<&RealRegUniverse>, size: u8) -> String {
match self {
RegMem::Reg { reg } => show_ireg_sized(*reg, mb_rru, size),
RegMem::Mem { addr } => addr.show_rru(mb_rru),
RegMem::Mem { addr, .. } => addr.show_rru(mb_rru),
}
}
}

View File

@@ -807,7 +807,12 @@ pub(crate) fn emit(
emit_std_reg_reg(sink, LegacyPrefix::None, 0x89, 1, *src, dst.to_reg(), rex);
}
Inst::MovZX_RM_R { ext_mode, src, dst } => {
Inst::MovZX_RM_R {
ext_mode,
src,
dst,
srcloc,
} => {
let (opcodes, num_opcodes, rex_flags) = match ext_mode {
ExtMode::BL => {
// MOVZBL is (REX.W==0) 0F B6 /r
@@ -849,27 +854,45 @@ pub(crate) fn emit(
*src,
rex_flags,
),
RegMem::Mem { addr: src } => emit_std_reg_mem(
sink,
LegacyPrefix::None,
opcodes,
num_opcodes,
dst.to_reg(),
&src.finalize(state),
rex_flags,
),
RegMem::Mem { addr: src } => {
let src = &src.finalize(state);
if let Some(srcloc) = *srcloc {
// Register the offset at which the actual load instruction starts.
sink.add_trap(srcloc, TrapCode::HeapOutOfBounds);
}
emit_std_reg_mem(
sink,
LegacyPrefix::None,
opcodes,
num_opcodes,
dst.to_reg(),
src,
rex_flags,
)
}
}
}
Inst::Mov64_M_R { src, dst } => emit_std_reg_mem(
sink,
LegacyPrefix::None,
0x8B,
1,
dst.to_reg(),
&src.finalize(state),
RexFlags::set_w(),
),
Inst::Mov64_M_R { src, dst, srcloc } => {
let src = &src.finalize(state);
if let Some(srcloc) = *srcloc {
// Register the offset at which the actual load instruction starts.
sink.add_trap(srcloc, TrapCode::HeapOutOfBounds);
}
emit_std_reg_mem(
sink,
LegacyPrefix::None,
0x8B,
1,
dst.to_reg(),
src,
RexFlags::set_w(),
)
}
Inst::LoadEffectiveAddress { addr, dst } => emit_std_reg_mem(
sink,
@@ -881,7 +904,12 @@ pub(crate) fn emit(
RexFlags::set_w(),
),
Inst::MovSX_RM_R { ext_mode, src, dst } => {
Inst::MovSX_RM_R {
ext_mode,
src,
dst,
srcloc,
} => {
let (opcodes, num_opcodes, rex_flags) = match ext_mode {
ExtMode::BL => {
// MOVSBL is (REX.W==0) 0F BE /r
@@ -915,21 +943,41 @@ pub(crate) fn emit(
*src,
rex_flags,
),
RegMem::Mem { addr: src } => emit_std_reg_mem(
sink,
LegacyPrefix::None,
opcodes,
num_opcodes,
dst.to_reg(),
&src.finalize(state),
rex_flags,
),
RegMem::Mem { addr: src } => {
let src = &src.finalize(state);
if let Some(srcloc) = *srcloc {
// Register the offset at which the actual load instruction starts.
sink.add_trap(srcloc, TrapCode::HeapOutOfBounds);
}
emit_std_reg_mem(
sink,
LegacyPrefix::None,
opcodes,
num_opcodes,
dst.to_reg(),
src,
rex_flags,
)
}
}
}
Inst::Mov_R_M { size, src, dst } => {
Inst::Mov_R_M {
size,
src,
dst,
srcloc,
} => {
let dst = &dst.finalize(state);
if let Some(srcloc) = *srcloc {
// Register the offset at which the actual load instruction starts.
sink.add_trap(srcloc, TrapCode::HeapOutOfBounds);
}
match size {
1 => {
// This is one of the few places where the presence of a
@@ -1350,7 +1398,7 @@ pub(crate) fn emit(
one_way_jmp(sink, CC::NB, *default_label); // idx unsigned >= jmp table size
// Copy the index (and make sure to clear the high 32-bits lane of tmp2).
let inst = Inst::movzx_rm_r(ExtMode::LQ, RegMem::reg(*idx), *tmp2);
let inst = Inst::movzx_rm_r(ExtMode::LQ, RegMem::reg(*idx), *tmp2, None);
inst.emit(sink, flags, state);
// Load base address of jump table.
@@ -1366,6 +1414,7 @@ pub(crate) fn emit(
ExtMode::LQ,
RegMem::mem(Amode::imm_reg_reg_shift(0, tmp1.to_reg(), tmp2.to_reg(), 2)),
*tmp2,
None,
);
inst.emit(sink, flags, state);
@@ -1418,6 +1467,7 @@ pub(crate) fn emit(
op,
src: src_e,
dst: reg_g,
srcloc,
} => {
let rex = RexFlags::clear_w();
let (prefix, opcode) = match op {
@@ -1432,9 +1482,12 @@ pub(crate) fn emit(
RegMem::Reg { reg: reg_e } => {
emit_std_reg_reg(sink, prefix, opcode, 2, reg_g.to_reg(), *reg_e, rex);
}
RegMem::Mem { addr } => {
let addr = &addr.finalize(state);
if let Some(srcloc) = *srcloc {
// Register the offset at which the actual load instruction starts.
sink.add_trap(srcloc, TrapCode::HeapOutOfBounds);
}
emit_std_reg_mem(sink, prefix, opcode, 2, reg_g.to_reg(), addr, rex);
}
}
@@ -1462,14 +1515,19 @@ pub(crate) fn emit(
RegMem::Reg { reg: reg_e } => {
emit_std_reg_reg(sink, prefix, opcode, 2, reg_g.to_reg(), *reg_e, rex);
}
RegMem::Mem { addr } => {
let addr = &addr.finalize(state);
emit_std_reg_mem(sink, prefix, opcode, 2, reg_g.to_reg(), addr, rex);
}
}
}
Inst::XMM_Mov_R_M { op, src, dst } => {
Inst::XMM_Mov_R_M {
op,
src,
dst,
srcloc,
} => {
let rex = RexFlags::clear_w();
let (prefix, opcode) = match op {
SseOpcode::Movd => (LegacyPrefix::_66, 0x0F7E),
@@ -1478,6 +1536,10 @@ pub(crate) fn emit(
};
let dst = &dst.finalize(state);
if let Some(srcloc) = *srcloc {
// Register the offset at which the actual load instruction starts.
sink.add_trap(srcloc, TrapCode::HeapOutOfBounds);
}
emit_std_reg_mem(sink, prefix, opcode, 2, *src, dst, rex);
}

File diff suppressed because it is too large Load Diff

View File

@@ -14,7 +14,7 @@ use regalloc::{RealRegUniverse, Reg, RegClass, RegUsageMapper, SpillSlot, Virtua
use smallvec::SmallVec;
use crate::binemit::CodeOffset;
use crate::ir::types::{B1, B128, B16, B32, B64, B8, F32, F64, I128, I16, I32, I64, I8};
use crate::ir::types::*;
use crate::ir::{ExternalName, Opcode, SourceLoc, TrapCode, Type};
use crate::machinst::*;
use crate::settings::Flags;
@@ -114,12 +114,16 @@ pub enum Inst {
ext_mode: ExtMode,
src: RegMem,
dst: Writable<Reg>,
/// Source location, if the memory access can be out-of-bounds.
srcloc: Option<SourceLoc>,
},
/// A plain 64-bit integer load, since MovZX_RM_R can't represent that.
Mov64_M_R {
src: SyntheticAmode,
dst: Writable<Reg>,
/// Source location, if the memory access can be out-of-bounds.
srcloc: Option<SourceLoc>,
},
/// Loads the memory address of addr into dst.
@@ -133,6 +137,8 @@ pub enum Inst {
ext_mode: ExtMode,
src: RegMem,
dst: Writable<Reg>,
/// Source location, if the memory access can be out-of-bounds.
srcloc: Option<SourceLoc>,
},
/// Integer stores: mov (b w l q) reg addr.
@@ -140,6 +146,8 @@ pub enum Inst {
size: u8, // 1, 2, 4 or 8.
src: Reg,
dst: SyntheticAmode,
/// Source location, if the memory access can be out-of-bounds.
srcloc: Option<SourceLoc>,
},
/// Arithmetic shifts: (shl shr sar) (l q) imm reg.
@@ -196,6 +204,8 @@ pub enum Inst {
op: SseOpcode,
src: RegMem,
dst: Writable<Reg>,
/// Source location, if the memory access can be out-of-bounds.
srcloc: Option<SourceLoc>,
},
/// mov reg addr (good for all memory stores from xmm registers)
@@ -203,6 +213,8 @@ pub enum Inst {
op: SseOpcode,
src: Reg,
dst: SyntheticAmode,
/// Source location, if the memory access can be out-of-bounds.
srcloc: Option<SourceLoc>,
},
// =====================================
@@ -367,9 +379,19 @@ impl Inst {
Inst::Mov_R_R { is_64, src, dst }
}
pub(crate) fn xmm_mov_rm_r(op: SseOpcode, src: RegMem, dst: Writable<Reg>) -> Inst {
pub(crate) fn xmm_mov_rm_r(
op: SseOpcode,
src: RegMem,
dst: Writable<Reg>,
srcloc: Option<SourceLoc>,
) -> Inst {
debug_assert!(dst.to_reg().get_class() == RegClass::V128);
Inst::XMM_Mov_RM_R { op, src, dst }
Inst::XMM_Mov_RM_R {
op,
src,
dst,
srcloc,
}
}
pub(crate) fn xmm_rm_r(op: SseOpcode, src: RegMem, dst: Writable<Reg>) -> Self {
@@ -377,38 +399,69 @@ impl Inst {
Inst::XMM_RM_R { op, src, dst }
}
pub(crate) fn xmm_mov_r_m(op: SseOpcode, src: Reg, dst: impl Into<SyntheticAmode>) -> Inst {
pub(crate) fn xmm_mov_r_m(
op: SseOpcode,
src: Reg,
dst: impl Into<SyntheticAmode>,
srcloc: Option<SourceLoc>,
) -> Inst {
debug_assert!(src.get_class() == RegClass::V128);
Inst::XMM_Mov_R_M {
op,
src,
dst: dst.into(),
srcloc,
}
}
pub(crate) fn movzx_rm_r(ext_mode: ExtMode, src: RegMem, dst: Writable<Reg>) -> Inst {
pub(crate) fn movzx_rm_r(
ext_mode: ExtMode,
src: RegMem,
dst: Writable<Reg>,
srcloc: Option<SourceLoc>,
) -> Inst {
debug_assert!(dst.to_reg().get_class() == RegClass::I64);
Inst::MovZX_RM_R { ext_mode, src, dst }
Inst::MovZX_RM_R {
ext_mode,
src,
dst,
srcloc,
}
}
pub(crate) fn movsx_rm_r(ext_mode: ExtMode, src: RegMem, dst: Writable<Reg>) -> Inst {
pub(crate) fn movsx_rm_r(
ext_mode: ExtMode,
src: RegMem,
dst: Writable<Reg>,
srcloc: Option<SourceLoc>,
) -> Inst {
debug_assert!(dst.to_reg().get_class() == RegClass::I64);
Inst::MovSX_RM_R { ext_mode, src, dst }
Inst::MovSX_RM_R {
ext_mode,
src,
dst,
srcloc,
}
}
pub(crate) fn mov64_m_r(src: impl Into<SyntheticAmode>, dst: Writable<Reg>) -> Inst {
pub(crate) fn mov64_m_r(
src: impl Into<SyntheticAmode>,
dst: Writable<Reg>,
srcloc: Option<SourceLoc>,
) -> Inst {
debug_assert!(dst.to_reg().get_class() == RegClass::I64);
Inst::Mov64_M_R {
src: src.into(),
dst,
srcloc,
}
}
/// A convenience function to be able to use a RegMem as the source of a move.
pub(crate) fn mov64_rm_r(src: RegMem, dst: Writable<Reg>) -> Inst {
pub(crate) fn mov64_rm_r(src: RegMem, dst: Writable<Reg>, srcloc: Option<SourceLoc>) -> Inst {
match src {
RegMem::Reg { reg } => Self::mov_r_r(true, reg, dst),
RegMem::Mem { addr } => Self::mov64_m_r(addr, dst),
RegMem::Mem { addr } => Self::mov64_m_r(addr, dst, srcloc),
}
}
@@ -416,6 +469,7 @@ impl Inst {
size: u8, // 1, 2, 4 or 8
src: Reg,
dst: impl Into<SyntheticAmode>,
srcloc: Option<SourceLoc>,
) -> Inst {
debug_assert!(size == 8 || size == 4 || size == 2 || size == 1);
debug_assert!(src.get_class() == RegClass::I64);
@@ -423,6 +477,7 @@ impl Inst {
size,
src,
dst: dst.into(),
srcloc,
}
}
@@ -663,13 +718,13 @@ impl ShowWithRRU for Inst {
_ => unreachable!(),
}
.into(),
Inst::XMM_Mov_RM_R { op, src, dst } => format!(
Inst::XMM_Mov_RM_R { op, src, dst, .. } => format!(
"{} {}, {}",
ljustify(op.to_string()),
src.show_rru_sized(mb_rru, op.src_size()),
show_ireg_sized(dst.to_reg(), mb_rru, 8),
),
Inst::XMM_Mov_R_M { op, src, dst } => format!(
Inst::XMM_Mov_R_M { op, src, dst, .. } => format!(
"{} {}, {}",
ljustify(op.to_string()),
show_ireg_sized(*src, mb_rru, 8),
@@ -708,7 +763,9 @@ impl ShowWithRRU for Inst {
show_ireg_sized(*src, mb_rru, sizeLQ(*is_64)),
show_ireg_sized(dst.to_reg(), mb_rru, sizeLQ(*is_64))
),
Inst::MovZX_RM_R { ext_mode, src, dst } => {
Inst::MovZX_RM_R {
ext_mode, src, dst, ..
} => {
if *ext_mode == ExtMode::LQ {
format!(
"{} {}, {}",
@@ -725,7 +782,7 @@ impl ShowWithRRU for Inst {
)
}
}
Inst::Mov64_M_R { src, dst } => format!(
Inst::Mov64_M_R { src, dst, .. } => format!(
"{} {}, {}",
ljustify("movq".to_string()),
src.show_rru(mb_rru),
@@ -737,13 +794,15 @@ impl ShowWithRRU for Inst {
addr.show_rru(mb_rru),
dst.show_rru(mb_rru)
),
Inst::MovSX_RM_R { ext_mode, src, dst } => format!(
Inst::MovSX_RM_R {
ext_mode, src, dst, ..
} => format!(
"{} {}, {}",
ljustify2("movs".to_string(), ext_mode.to_string()),
src.show_rru_sized(mb_rru, ext_mode.src_size()),
show_ireg_sized(dst.to_reg(), mb_rru, ext_mode.dst_size())
),
Inst::Mov_R_M { size, src, dst } => format!(
Inst::Mov_R_M { size, src, dst, .. } => format!(
"{} {}, {}",
ljustify2("mov".to_string(), suffixBWLQ(*size)),
show_ireg_sized(*src, mb_rru, *size),
@@ -906,7 +965,7 @@ fn x64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
src.get_regs_as_uses(collector);
collector.add_def(*dst);
}
Inst::Mov64_M_R { src, dst } | Inst::LoadEffectiveAddress { addr: src, dst } => {
Inst::Mov64_M_R { src, dst, .. } | Inst::LoadEffectiveAddress { addr: src, dst } => {
src.get_regs_as_uses(collector);
collector.add_def(*dst)
}
@@ -1051,7 +1110,7 @@ impl RegMem {
fn map_uses<RUM: RegUsageMapper>(&mut self, map: &RUM) {
match self {
RegMem::Reg { ref mut reg } => map_use(map, reg),
RegMem::Mem { ref mut addr } => addr.map_uses(map),
RegMem::Mem { ref mut addr, .. } => addr.map_uses(map),
}
}
}
@@ -1123,7 +1182,7 @@ fn x64_map_regs<RUM: RegUsageMapper>(inst: &mut Inst, mapper: &RUM) {
src.map_uses(mapper);
map_def(mapper, dst);
}
Inst::Mov64_M_R { src, dst } | Inst::LoadEffectiveAddress { addr: src, dst } => {
Inst::Mov64_M_R { src, dst, .. } | Inst::LoadEffectiveAddress { addr: src, dst } => {
src.map_uses(mapper);
map_def(mapper, dst);
}
@@ -1243,8 +1302,10 @@ impl MachInst for Inst {
// conceivably use `movl %reg, %reg` to zero out the top 32 bits of
// %reg.
match self {
Self::Mov_R_R { is_64, src, dst } if *is_64 => Some((*dst, *src)),
Self::XMM_Mov_RM_R { op, src, dst }
Self::Mov_R_R {
is_64, src, dst, ..
} if *is_64 => Some((*dst, *src)),
Self::XMM_Mov_RM_R { op, src, dst, .. }
if *op == SseOpcode::Movss
|| *op == SseOpcode::Movsd
|| *op == SseOpcode::Movaps =>
@@ -1292,8 +1353,8 @@ impl MachInst for Inst {
match rc_dst {
RegClass::I64 => Inst::mov_r_r(true, src_reg, dst_reg),
RegClass::V128 => match ty {
F32 => Inst::xmm_mov_rm_r(SseOpcode::Movss, RegMem::reg(src_reg), dst_reg),
F64 => Inst::xmm_mov_rm_r(SseOpcode::Movsd, RegMem::reg(src_reg), dst_reg),
F32 => Inst::xmm_mov_rm_r(SseOpcode::Movss, RegMem::reg(src_reg), dst_reg, None),
F64 => Inst::xmm_mov_rm_r(SseOpcode::Movsd, RegMem::reg(src_reg), dst_reg, None),
_ => panic!("unexpected V128 type in gen_move"),
},
_ => panic!("gen_move(x64): unhandled regclass"),
@@ -1316,6 +1377,7 @@ impl MachInst for Inst {
match ty {
I8 | I16 | I32 | I64 | B1 | B8 | B16 | B32 | B64 => Ok(RegClass::I64),
F32 | F64 | I128 | B128 => Ok(RegClass::V128),
IFLAGS | FFLAGS => Ok(RegClass::I64),
_ => Err(CodegenError::Unsupported(format!(
"Unexpected SSA-value type: {}",
ty

View File

@@ -157,10 +157,14 @@ fn extend_input_to_reg(ctx: Ctx, spec: InsnInput, ext_spec: ExtSpec) -> Reg {
let dst = ctx.alloc_tmp(RegClass::I64, requested_ty);
match ext_spec {
ExtSpec::ZeroExtend32 | ExtSpec::ZeroExtend64 => {
ctx.emit(Inst::movzx_rm_r(ext_mode, src, dst))
ctx.emit(Inst::movzx_rm_r(
ext_mode, src, dst, /* infallible */ None,
))
}
ExtSpec::SignExtend32 | ExtSpec::SignExtend64 => {
ctx.emit(Inst::movsx_rm_r(ext_mode, src, dst))
ctx.emit(Inst::movsx_rm_r(
ext_mode, src, dst, /* infallible */ None,
))
}
}
dst.to_reg()
@@ -345,11 +349,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
tmp,
));
ctx.emit(Inst::imm_r(
ty == I64,
ty.bits() as u64 - 1,
dst,
));
ctx.emit(Inst::imm_r(ty == I64, ty.bits() as u64 - 1, dst));
ctx.emit(Inst::alu_rmi_r(
ty == I64,
@@ -415,7 +415,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
let cst = ctx.alloc_tmp(RegClass::I64, I64);
// mov src, tmp1
ctx.emit(Inst::mov64_rm_r(src.clone(), tmp1));
ctx.emit(Inst::mov64_rm_r(src.clone(), tmp1, None));
// shr $1, tmp1
ctx.emit(Inst::shift_r(is_64, ShiftKind::RightZ, Some(1), tmp1));
@@ -432,7 +432,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
));
// mov src, tmp2
ctx.emit(Inst::mov64_rm_r(src, tmp2));
ctx.emit(Inst::mov64_rm_r(src, tmp2, None));
// sub tmp1, tmp2
ctx.emit(Inst::alu_rmi_r(
@@ -481,7 +481,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
));
// mov tmp2, dst
ctx.emit(Inst::mov64_rm_r(RegMem::reg(tmp2.to_reg()), dst));
ctx.emit(Inst::mov64_rm_r(RegMem::reg(tmp2.to_reg()), dst, None));
// shr $4, dst
ctx.emit(Inst::shift_r(is_64, ShiftKind::RightZ, Some(4), dst));
@@ -526,7 +526,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
let tmp2 = ctx.alloc_tmp(RegClass::I64, I64);
// mov src, tmp1
ctx.emit(Inst::mov64_rm_r(src.clone(), tmp1));
ctx.emit(Inst::mov64_rm_r(src.clone(), tmp1, None));
// shr $1, tmp1
ctx.emit(Inst::shift_r(is_64, ShiftKind::RightZ, Some(1), tmp1));
@@ -540,7 +540,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
));
// mov src, tmp2
ctx.emit(Inst::mov64_rm_r(src, tmp2));
ctx.emit(Inst::mov64_rm_r(src, tmp2, None));
// sub tmp1, tmp2
ctx.emit(Inst::alu_rmi_r(
@@ -589,7 +589,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
));
// mov tmp2, dst
ctx.emit(Inst::mov64_rm_r(RegMem::reg(tmp2.to_reg()), dst));
ctx.emit(Inst::mov64_rm_r(RegMem::reg(tmp2.to_reg()), dst, None));
// shr $4, dst
ctx.emit(Inst::shift_r(is_64, ShiftKind::RightZ, Some(4), dst));
@@ -662,12 +662,16 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
if let Some(ext_mode) = ext_mode {
if op == Opcode::Sextend {
ctx.emit(Inst::movsx_rm_r(ext_mode, src, dst));
ctx.emit(Inst::movsx_rm_r(
ext_mode, src, dst, /* infallible */ None,
));
} else {
ctx.emit(Inst::movzx_rm_r(ext_mode, src, dst));
ctx.emit(Inst::movzx_rm_r(
ext_mode, src, dst, /* infallible */ None,
));
}
} else {
ctx.emit(Inst::mov64_rm_r(src, dst));
ctx.emit(Inst::mov64_rm_r(src, dst, /* infallible */ None));
}
}
@@ -752,7 +756,12 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
// TODO Fmax, Fmin.
_ => unimplemented!(),
};
ctx.emit(Inst::xmm_mov_rm_r(SseOpcode::Movss, RegMem::reg(lhs), dst));
ctx.emit(Inst::xmm_mov_rm_r(
SseOpcode::Movss,
RegMem::reg(lhs),
dst,
None,
));
ctx.emit(Inst::xmm_rm_r(sse_op, RegMem::reg(rhs), dst));
} else {
unimplemented!("unimplemented lowering for opcode {:?}", op);
@@ -779,17 +788,20 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
SseOpcode::Movd,
RegMem::reg(tmp_gpr1.to_reg()),
tmp_xmm1,
None,
));
ctx.emit(Inst::xmm_mov_rm_r(
SseOpcode::Movaps,
RegMem::reg(tmp_xmm1.to_reg()),
dst,
None,
));
ctx.emit(Inst::xmm_rm_r(SseOpcode::Andnps, RegMem::reg(lhs), dst));
ctx.emit(Inst::xmm_mov_rm_r(
SseOpcode::Movss,
RegMem::reg(rhs),
tmp_xmm2,
None,
));
ctx.emit(Inst::xmm_rm_r(
SseOpcode::Andps,
@@ -890,25 +902,37 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
_ => unreachable!(),
};
let srcloc = Some(ctx.srcloc(insn));
let dst = output_to_reg(ctx, outputs[0]);
match (sign_extend, is_float) {
(true, false) => {
// The load is sign-extended only when the output size is lower than 64 bits,
// so ext-mode is defined in this case.
ctx.emit(Inst::movsx_rm_r(ext_mode.unwrap(), RegMem::mem(addr), dst));
ctx.emit(Inst::movsx_rm_r(
ext_mode.unwrap(),
RegMem::mem(addr),
dst,
srcloc,
));
}
(false, false) => {
if elem_ty.bytes() == 8 {
// Use a plain load.
ctx.emit(Inst::mov64_m_r(addr, dst))
ctx.emit(Inst::mov64_m_r(addr, dst, srcloc))
} else {
// Use a zero-extended load.
ctx.emit(Inst::movzx_rm_r(ext_mode.unwrap(), RegMem::mem(addr), dst))
ctx.emit(Inst::movzx_rm_r(
ext_mode.unwrap(),
RegMem::mem(addr),
dst,
srcloc,
))
}
}
(_, true) => {
ctx.emit(match elem_ty {
F32 => Inst::xmm_mov_rm_r(SseOpcode::Movss, RegMem::mem(addr), dst),
F32 => Inst::xmm_mov_rm_r(SseOpcode::Movss, RegMem::mem(addr), dst, srcloc),
_ => unimplemented!("FP load not 32-bit"),
});
}
@@ -964,13 +988,15 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
let src = input_to_reg(ctx, inputs[0]);
let srcloc = Some(ctx.srcloc(insn));
if is_float {
ctx.emit(match elem_ty {
F32 => Inst::xmm_mov_r_m(SseOpcode::Movss, src, addr),
F32 => Inst::xmm_mov_r_m(SseOpcode::Movss, src, addr, srcloc),
_ => unimplemented!("FP store not 32-bit"),
});
} else {
ctx.emit(Inst::mov_r_m(elem_ty.bytes() as u8, src, addr));
ctx.emit(Inst::mov_r_m(elem_ty.bytes() as u8, src, addr, srcloc));
}
}
@@ -1049,8 +1075,8 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
if size == 1 {
// Sign-extend operands to 32, then do a cmove of size 4.
let lhs_se = ctx.alloc_tmp(RegClass::I64, I32);
ctx.emit(Inst::movsx_rm_r(ExtMode::BL, lhs, lhs_se));
ctx.emit(Inst::movsx_rm_r(ExtMode::BL, RegMem::reg(rhs), dst));
ctx.emit(Inst::movsx_rm_r(ExtMode::BL, lhs, lhs_se, None));
ctx.emit(Inst::movsx_rm_r(ExtMode::BL, RegMem::reg(rhs), dst, None));
ctx.emit(Inst::cmove(4, cc, RegMem::reg(lhs_se.to_reg()), dst));
} else {
ctx.emit(Inst::gen_move(dst, rhs, ty));