[machinst x64]: add source locations to more instruction formats

In order to register traps for `load_splat`, several instruction formats need knowledge of `SourceLoc`s; however, since the x64 backend does not correctly and completely register traps for `RegMem::Mem` variants I opened https://github.com/bytecodealliance/wasmtime/issues/2290 to discuss and resolve this issue. In the meantime, the current behavior (i.e. remaining largely unaware of `SourceLoc`s) is retained.
This commit is contained in:
Andrew Brown
2020-10-13 10:02:12 -07:00
parent e0b911a4df
commit d990dd4c9a
4 changed files with 303 additions and 145 deletions

View File

@@ -1728,6 +1728,7 @@ pub(crate) fn emit(
op,
src: src_e,
dst: reg_g,
srcloc,
} => {
let rex = RexFlags::clear_w();
let (prefix, opcode, length) = match op {
@@ -1819,6 +1820,10 @@ pub(crate) fn emit(
emit_std_reg_reg(sink, prefix, opcode, length, reg_g.to_reg(), *reg_e, rex);
}
RegMem::Mem { addr } => {
if let Some(srcloc) = *srcloc {
// Register the offset at which the actual load instruction starts.
sink.add_trap(srcloc, TrapCode::HeapOutOfBounds);
}
let addr = &addr.finalize(state);
emit_std_reg_mem(sink, prefix, opcode, length, reg_g.to_reg(), addr, rex);
}
@@ -1889,7 +1894,7 @@ pub(crate) fn emit(
// and negative zero. These instructions merge the sign bits in that
// case, and are no-ops otherwise.
let op = if *is_min { or_op } else { and_op };
let inst = Inst::xmm_rm_r(op, RegMem::reg(*lhs), *rhs_dst);
let inst = Inst::xmm_rm_r(op, RegMem::reg(*lhs), *rhs_dst, None);
inst.emit(sink, info, state);
let inst = Inst::jmp_known(done);
@@ -1899,13 +1904,13 @@ pub(crate) fn emit(
// read-only operand: perform an addition between the two operands, which has the
// desired NaN propagation effects.
sink.bind_label(propagate_nan);
let inst = Inst::xmm_rm_r(add_op, RegMem::reg(*lhs), *rhs_dst);
let inst = Inst::xmm_rm_r(add_op, RegMem::reg(*lhs), *rhs_dst, None);
inst.emit(sink, info, state);
one_way_jmp(sink, CC::P, done);
sink.bind_label(do_min_max);
let inst = Inst::xmm_rm_r(min_max_op, RegMem::reg(*lhs), *rhs_dst);
let inst = Inst::xmm_rm_r(min_max_op, RegMem::reg(*lhs), *rhs_dst, None);
inst.emit(sink, info, state);
sink.bind_label(done);
@@ -1916,7 +1921,8 @@ pub(crate) fn emit(
src,
dst,
imm,
is64: w,
is64,
srcloc,
} => {
let (prefix, opcode, len) = match op {
SseOpcode::Cmpps => (LegacyPrefixes::None, 0x0FC2, 2),
@@ -1933,7 +1939,7 @@ pub(crate) fn emit(
SseOpcode::Pshufd => (LegacyPrefixes::_66, 0x0F70, 2),
_ => unimplemented!("Opcode {:?} not implemented", op),
};
let rex = if *w {
let rex = if *is64 {
RexFlags::set_w()
} else {
RexFlags::clear_w()
@@ -1955,6 +1961,10 @@ pub(crate) fn emit(
}
}
RegMem::Mem { addr } => {
if let Some(srcloc) = *srcloc {
// Register the offset at which the actual load instruction starts.
sink.add_trap(srcloc, TrapCode::HeapOutOfBounds);
}
let addr = &addr.finalize(state);
assert!(
!regs_swapped,
@@ -1963,7 +1973,7 @@ pub(crate) fn emit(
emit_std_reg_mem(sink, prefix, opcode, len, dst.to_reg(), addr, rex);
}
}
sink.put1(*imm)
sink.put1(*imm);
}
Inst::XmmLoadConstSeq { val, dst, ty } => {
@@ -2188,7 +2198,7 @@ pub(crate) fn emit(
} else {
SseOpcode::Addss
};
let inst = Inst::xmm_rm_r(add_op, RegMem::reg(dst.to_reg()), *dst);
let inst = Inst::xmm_rm_r(add_op, RegMem::reg(dst.to_reg()), *dst, None);
inst.emit(sink, info, state);
sink.bind_label(done);
@@ -2295,8 +2305,12 @@ pub(crate) fn emit(
// If the input was positive, saturate to INT_MAX.
// Zero out tmp_xmm.
let inst =
Inst::xmm_rm_r(SseOpcode::Xorpd, RegMem::reg(tmp_xmm.to_reg()), *tmp_xmm);
let inst = Inst::xmm_rm_r(
SseOpcode::Xorpd,
RegMem::reg(tmp_xmm.to_reg()),
*tmp_xmm,
None,
);
inst.emit(sink, info, state);
let inst = Inst::xmm_cmp_rm_r(cmp_op, RegMem::reg(src), tmp_xmm.to_reg());
@@ -2367,8 +2381,12 @@ pub(crate) fn emit(
sink.bind_label(check_positive);
// Zero out the tmp_xmm register.
let inst =
Inst::xmm_rm_r(SseOpcode::Xorpd, RegMem::reg(tmp_xmm.to_reg()), *tmp_xmm);
let inst = Inst::xmm_rm_r(
SseOpcode::Xorpd,
RegMem::reg(tmp_xmm.to_reg()),
*tmp_xmm,
None,
);
inst.emit(sink, info, state);
let inst = Inst::xmm_cmp_rm_r(cmp_op, RegMem::reg(src), tmp_xmm.to_reg());
@@ -2522,7 +2540,7 @@ pub(crate) fn emit(
sink.bind_label(handle_large);
let inst = Inst::xmm_rm_r(sub_op, RegMem::reg(tmp_xmm.to_reg()), *src);
let inst = Inst::xmm_rm_r(sub_op, RegMem::reg(tmp_xmm.to_reg()), *src, None);
inst.emit(sink, info, state);
let inst = Inst::xmm_to_gpr(trunc_op, src.to_reg(), *dst, *dst_size);

View File

@@ -2983,12 +2983,12 @@ fn test_x64_emit() {
// XMM_RM_R: float binary ops
insns.push((
Inst::xmm_rm_r(SseOpcode::Addss, RegMem::reg(xmm1), w_xmm0),
Inst::xmm_rm_r(SseOpcode::Addss, RegMem::reg(xmm1), w_xmm0, None),
"F30F58C1",
"addss %xmm1, %xmm0",
));
insns.push((
Inst::xmm_rm_r(SseOpcode::Addss, RegMem::reg(xmm11), w_xmm13),
Inst::xmm_rm_r(SseOpcode::Addss, RegMem::reg(xmm11), w_xmm13, None),
"F3450F58EB",
"addss %xmm11, %xmm13",
));
@@ -2997,23 +2997,24 @@ fn test_x64_emit() {
SseOpcode::Addss,
RegMem::mem(Amode::imm_reg_reg_shift(123, r10, rdx, 2)),
w_xmm0,
None,
),
"F3410F5844927B",
"addss 123(%r10,%rdx,4), %xmm0",
));
insns.push((
Inst::xmm_rm_r(SseOpcode::Addsd, RegMem::reg(xmm15), w_xmm4),
Inst::xmm_rm_r(SseOpcode::Addsd, RegMem::reg(xmm15), w_xmm4, None),
"F2410F58E7",
"addsd %xmm15, %xmm4",
));
insns.push((
Inst::xmm_rm_r(SseOpcode::Subss, RegMem::reg(xmm0), w_xmm1),
Inst::xmm_rm_r(SseOpcode::Subss, RegMem::reg(xmm0), w_xmm1, None),
"F30F5CC8",
"subss %xmm0, %xmm1",
));
insns.push((
Inst::xmm_rm_r(SseOpcode::Subss, RegMem::reg(xmm12), w_xmm1),
Inst::xmm_rm_r(SseOpcode::Subss, RegMem::reg(xmm12), w_xmm1, None),
"F3410F5CCC",
"subss %xmm12, %xmm1",
));
@@ -3022,57 +3023,58 @@ fn test_x64_emit() {
SseOpcode::Subss,
RegMem::mem(Amode::imm_reg_reg_shift(321, r10, rax, 3)),
w_xmm10,
None,
),
"F3450F5C94C241010000",
"subss 321(%r10,%rax,8), %xmm10",
));
insns.push((
Inst::xmm_rm_r(SseOpcode::Subsd, RegMem::reg(xmm5), w_xmm14),
Inst::xmm_rm_r(SseOpcode::Subsd, RegMem::reg(xmm5), w_xmm14, None),
"F2440F5CF5",
"subsd %xmm5, %xmm14",
));
insns.push((
Inst::xmm_rm_r(SseOpcode::Mulss, RegMem::reg(xmm5), w_xmm4),
Inst::xmm_rm_r(SseOpcode::Mulss, RegMem::reg(xmm5), w_xmm4, None),
"F30F59E5",
"mulss %xmm5, %xmm4",
));
insns.push((
Inst::xmm_rm_r(SseOpcode::Mulsd, RegMem::reg(xmm5), w_xmm4),
Inst::xmm_rm_r(SseOpcode::Mulsd, RegMem::reg(xmm5), w_xmm4, None),
"F20F59E5",
"mulsd %xmm5, %xmm4",
));
insns.push((
Inst::xmm_rm_r(SseOpcode::Divss, RegMem::reg(xmm8), w_xmm7),
Inst::xmm_rm_r(SseOpcode::Divss, RegMem::reg(xmm8), w_xmm7, None),
"F3410F5EF8",
"divss %xmm8, %xmm7",
));
insns.push((
Inst::xmm_rm_r(SseOpcode::Divsd, RegMem::reg(xmm5), w_xmm4),
Inst::xmm_rm_r(SseOpcode::Divsd, RegMem::reg(xmm5), w_xmm4, None),
"F20F5EE5",
"divsd %xmm5, %xmm4",
));
insns.push((
Inst::xmm_rm_r(SseOpcode::Andps, RegMem::reg(xmm3), w_xmm12),
Inst::xmm_rm_r(SseOpcode::Andps, RegMem::reg(xmm3), w_xmm12, None),
"440F54E3",
"andps %xmm3, %xmm12",
));
insns.push((
Inst::xmm_rm_r(SseOpcode::Andnps, RegMem::reg(xmm4), w_xmm11),
Inst::xmm_rm_r(SseOpcode::Andnps, RegMem::reg(xmm4), w_xmm11, None),
"440F55DC",
"andnps %xmm4, %xmm11",
));
insns.push((
Inst::xmm_rm_r(SseOpcode::Orps, RegMem::reg(xmm1), w_xmm15),
Inst::xmm_rm_r(SseOpcode::Orps, RegMem::reg(xmm1), w_xmm15, None),
"440F56F9",
"orps %xmm1, %xmm15",
));
insns.push((
Inst::xmm_rm_r(SseOpcode::Orps, RegMem::reg(xmm5), w_xmm4),
Inst::xmm_rm_r(SseOpcode::Orps, RegMem::reg(xmm5), w_xmm4, None),
"0F56E5",
"orps %xmm5, %xmm4",
));
@@ -3081,211 +3083,211 @@ fn test_x64_emit() {
// XMM_RM_R: Integer Packed
insns.push((
Inst::xmm_rm_r(SseOpcode::Paddb, RegMem::reg(xmm9), w_xmm5),
Inst::xmm_rm_r(SseOpcode::Paddb, RegMem::reg(xmm9), w_xmm5, None),
"66410FFCE9",
"paddb %xmm9, %xmm5",
));
insns.push((
Inst::xmm_rm_r(SseOpcode::Paddw, RegMem::reg(xmm7), w_xmm6),
Inst::xmm_rm_r(SseOpcode::Paddw, RegMem::reg(xmm7), w_xmm6, None),
"660FFDF7",
"paddw %xmm7, %xmm6",
));
insns.push((
Inst::xmm_rm_r(SseOpcode::Paddd, RegMem::reg(xmm12), w_xmm13),
Inst::xmm_rm_r(SseOpcode::Paddd, RegMem::reg(xmm12), w_xmm13, None),
"66450FFEEC",
"paddd %xmm12, %xmm13",
));
insns.push((
Inst::xmm_rm_r(SseOpcode::Paddq, RegMem::reg(xmm1), w_xmm8),
Inst::xmm_rm_r(SseOpcode::Paddq, RegMem::reg(xmm1), w_xmm8, None),
"66440FD4C1",
"paddq %xmm1, %xmm8",
));
insns.push((
Inst::xmm_rm_r(SseOpcode::Paddsb, RegMem::reg(xmm9), w_xmm5),
Inst::xmm_rm_r(SseOpcode::Paddsb, RegMem::reg(xmm9), w_xmm5, None),
"66410FECE9",
"paddsb %xmm9, %xmm5",
));
insns.push((
Inst::xmm_rm_r(SseOpcode::Paddsw, RegMem::reg(xmm7), w_xmm6),
Inst::xmm_rm_r(SseOpcode::Paddsw, RegMem::reg(xmm7), w_xmm6, None),
"660FEDF7",
"paddsw %xmm7, %xmm6",
));
insns.push((
Inst::xmm_rm_r(SseOpcode::Paddusb, RegMem::reg(xmm12), w_xmm13),
Inst::xmm_rm_r(SseOpcode::Paddusb, RegMem::reg(xmm12), w_xmm13, None),
"66450FDCEC",
"paddusb %xmm12, %xmm13",
));
insns.push((
Inst::xmm_rm_r(SseOpcode::Paddusw, RegMem::reg(xmm1), w_xmm8),
Inst::xmm_rm_r(SseOpcode::Paddusw, RegMem::reg(xmm1), w_xmm8, None),
"66440FDDC1",
"paddusw %xmm1, %xmm8",
));
insns.push((
Inst::xmm_rm_r(SseOpcode::Psubsb, RegMem::reg(xmm9), w_xmm5),
Inst::xmm_rm_r(SseOpcode::Psubsb, RegMem::reg(xmm9), w_xmm5, None),
"66410FE8E9",
"psubsb %xmm9, %xmm5",
));
insns.push((
Inst::xmm_rm_r(SseOpcode::Psubsw, RegMem::reg(xmm7), w_xmm6),
Inst::xmm_rm_r(SseOpcode::Psubsw, RegMem::reg(xmm7), w_xmm6, None),
"660FE9F7",
"psubsw %xmm7, %xmm6",
));
insns.push((
Inst::xmm_rm_r(SseOpcode::Psubusb, RegMem::reg(xmm12), w_xmm13),
Inst::xmm_rm_r(SseOpcode::Psubusb, RegMem::reg(xmm12), w_xmm13, None),
"66450FD8EC",
"psubusb %xmm12, %xmm13",
));
insns.push((
Inst::xmm_rm_r(SseOpcode::Psubusw, RegMem::reg(xmm1), w_xmm8),
Inst::xmm_rm_r(SseOpcode::Psubusw, RegMem::reg(xmm1), w_xmm8, None),
"66440FD9C1",
"psubusw %xmm1, %xmm8",
));
insns.push((
Inst::xmm_rm_r(SseOpcode::Pavgb, RegMem::reg(xmm12), w_xmm13),
Inst::xmm_rm_r(SseOpcode::Pavgb, RegMem::reg(xmm12), w_xmm13, None),
"66450FE0EC",
"pavgb %xmm12, %xmm13",
));
insns.push((
Inst::xmm_rm_r(SseOpcode::Pavgw, RegMem::reg(xmm1), w_xmm8),
Inst::xmm_rm_r(SseOpcode::Pavgw, RegMem::reg(xmm1), w_xmm8, None),
"66440FE3C1",
"pavgw %xmm1, %xmm8",
));
insns.push((
Inst::xmm_rm_r(SseOpcode::Psubb, RegMem::reg(xmm5), w_xmm9),
Inst::xmm_rm_r(SseOpcode::Psubb, RegMem::reg(xmm5), w_xmm9, None),
"66440FF8CD",
"psubb %xmm5, %xmm9",
));
insns.push((
Inst::xmm_rm_r(SseOpcode::Psubw, RegMem::reg(xmm6), w_xmm7),
Inst::xmm_rm_r(SseOpcode::Psubw, RegMem::reg(xmm6), w_xmm7, None),
"660FF9FE",
"psubw %xmm6, %xmm7",
));
insns.push((
Inst::xmm_rm_r(SseOpcode::Psubd, RegMem::reg(xmm13), w_xmm12),
Inst::xmm_rm_r(SseOpcode::Psubd, RegMem::reg(xmm13), w_xmm12, None),
"66450FFAE5",
"psubd %xmm13, %xmm12",
));
insns.push((
Inst::xmm_rm_r(SseOpcode::Psubq, RegMem::reg(xmm8), w_xmm1),
Inst::xmm_rm_r(SseOpcode::Psubq, RegMem::reg(xmm8), w_xmm1, None),
"66410FFBC8",
"psubq %xmm8, %xmm1",
));
insns.push((
Inst::xmm_rm_r(SseOpcode::Pmulld, RegMem::reg(xmm15), w_xmm6),
Inst::xmm_rm_r(SseOpcode::Pmulld, RegMem::reg(xmm15), w_xmm6, None),
"66410F3840F7",
"pmulld %xmm15, %xmm6",
));
insns.push((
Inst::xmm_rm_r(SseOpcode::Pmullw, RegMem::reg(xmm14), w_xmm1),
Inst::xmm_rm_r(SseOpcode::Pmullw, RegMem::reg(xmm14), w_xmm1, None),
"66410FD5CE",
"pmullw %xmm14, %xmm1",
));
insns.push((
Inst::xmm_rm_r(SseOpcode::Pmuludq, RegMem::reg(xmm8), w_xmm9),
Inst::xmm_rm_r(SseOpcode::Pmuludq, RegMem::reg(xmm8), w_xmm9, None),
"66450FF4C8",
"pmuludq %xmm8, %xmm9",
));
insns.push((
Inst::xmm_rm_r(SseOpcode::Pmaxsb, RegMem::reg(xmm15), w_xmm6),
Inst::xmm_rm_r(SseOpcode::Pmaxsb, RegMem::reg(xmm15), w_xmm6, None),
"66410F383CF7",
"pmaxsb %xmm15, %xmm6",
));
insns.push((
Inst::xmm_rm_r(SseOpcode::Pmaxsw, RegMem::reg(xmm15), w_xmm6),
Inst::xmm_rm_r(SseOpcode::Pmaxsw, RegMem::reg(xmm15), w_xmm6, None),
"66410FEEF7",
"pmaxsw %xmm15, %xmm6",
));
insns.push((
Inst::xmm_rm_r(SseOpcode::Pmaxsd, RegMem::reg(xmm15), w_xmm6),
Inst::xmm_rm_r(SseOpcode::Pmaxsd, RegMem::reg(xmm15), w_xmm6, None),
"66410F383DF7",
"pmaxsd %xmm15, %xmm6",
));
insns.push((
Inst::xmm_rm_r(SseOpcode::Pmaxub, RegMem::reg(xmm14), w_xmm1),
Inst::xmm_rm_r(SseOpcode::Pmaxub, RegMem::reg(xmm14), w_xmm1, None),
"66410FDECE",
"pmaxub %xmm14, %xmm1",
));
insns.push((
Inst::xmm_rm_r(SseOpcode::Pmaxuw, RegMem::reg(xmm14), w_xmm1),
Inst::xmm_rm_r(SseOpcode::Pmaxuw, RegMem::reg(xmm14), w_xmm1, None),
"66410F383ECE",
"pmaxuw %xmm14, %xmm1",
));
insns.push((
Inst::xmm_rm_r(SseOpcode::Pmaxud, RegMem::reg(xmm14), w_xmm1),
Inst::xmm_rm_r(SseOpcode::Pmaxud, RegMem::reg(xmm14), w_xmm1, None),
"66410F383FCE",
"pmaxud %xmm14, %xmm1",
));
insns.push((
Inst::xmm_rm_r(SseOpcode::Pminsb, RegMem::reg(xmm8), w_xmm9),
Inst::xmm_rm_r(SseOpcode::Pminsb, RegMem::reg(xmm8), w_xmm9, None),
"66450F3838C8",
"pminsb %xmm8, %xmm9",
));
insns.push((
Inst::xmm_rm_r(SseOpcode::Pminsw, RegMem::reg(xmm8), w_xmm9),
Inst::xmm_rm_r(SseOpcode::Pminsw, RegMem::reg(xmm8), w_xmm9, None),
"66450FEAC8",
"pminsw %xmm8, %xmm9",
));
insns.push((
Inst::xmm_rm_r(SseOpcode::Pminsd, RegMem::reg(xmm8), w_xmm9),
Inst::xmm_rm_r(SseOpcode::Pminsd, RegMem::reg(xmm8), w_xmm9, None),
"66450F3839C8",
"pminsd %xmm8, %xmm9",
));
insns.push((
Inst::xmm_rm_r(SseOpcode::Pminub, RegMem::reg(xmm3), w_xmm2),
Inst::xmm_rm_r(SseOpcode::Pminub, RegMem::reg(xmm3), w_xmm2, None),
"660FDAD3",
"pminub %xmm3, %xmm2",
));
insns.push((
Inst::xmm_rm_r(SseOpcode::Pminuw, RegMem::reg(xmm3), w_xmm2),
Inst::xmm_rm_r(SseOpcode::Pminuw, RegMem::reg(xmm3), w_xmm2, None),
"660F383AD3",
"pminuw %xmm3, %xmm2",
));
insns.push((
Inst::xmm_rm_r(SseOpcode::Pminud, RegMem::reg(xmm3), w_xmm2),
Inst::xmm_rm_r(SseOpcode::Pminud, RegMem::reg(xmm3), w_xmm2, None),
"660F383BD3",
"pminud %xmm3, %xmm2",
));
insns.push((
Inst::xmm_rm_r(SseOpcode::Pxor, RegMem::reg(xmm11), w_xmm2),
Inst::xmm_rm_r(SseOpcode::Pxor, RegMem::reg(xmm11), w_xmm2, None),
"66410FEFD3",
"pxor %xmm11, %xmm2",
));
insns.push((
Inst::xmm_rm_r(SseOpcode::Pshufb, RegMem::reg(xmm11), w_xmm2),
Inst::xmm_rm_r(SseOpcode::Pshufb, RegMem::reg(xmm11), w_xmm2, None),
"66410F3800D3",
"pshufb %xmm11, %xmm2",
));
@@ -3488,12 +3490,12 @@ fn test_x64_emit() {
// ========================================================
// XmmRmRImm
insns.push((
Inst::xmm_rm_r_imm(SseOpcode::Cmppd, RegMem::reg(xmm5), w_xmm1, 2, false),
Inst::xmm_rm_r_imm(SseOpcode::Cmppd, RegMem::reg(xmm5), w_xmm1, 2, false, None),
"660FC2CD02",
"cmppd $2, %xmm5, %xmm1",
));
insns.push((
Inst::xmm_rm_r_imm(SseOpcode::Cmpps, RegMem::reg(xmm15), w_xmm7, 0, false),
Inst::xmm_rm_r_imm(SseOpcode::Cmpps, RegMem::reg(xmm15), w_xmm7, 0, false, None),
"410FC2FF00",
"cmpps $0, %xmm15, %xmm7",
));

View File

@@ -212,6 +212,7 @@ pub enum Inst {
op: SseOpcode,
src: RegMem,
dst: Writable<Reg>,
srcloc: Option<SourceLoc>,
},
/// XMM (scalar or vector) unary op: mov between XMM registers (32 64) (reg addr) reg, sqrt,
@@ -338,6 +339,7 @@ pub enum Inst {
dst: Writable<Reg>,
imm: u8,
is64: bool,
srcloc: Option<SourceLoc>,
},
// =====================================
@@ -711,10 +713,20 @@ impl Inst {
}
}
pub(crate) fn xmm_rm_r(op: SseOpcode, src: RegMem, dst: Writable<Reg>) -> Self {
pub(crate) fn xmm_rm_r(
op: SseOpcode,
src: RegMem,
dst: Writable<Reg>,
srcloc: Option<SourceLoc>,
) -> Self {
src.assert_regclass_is(RegClass::V128);
debug_assert!(dst.to_reg().get_class() == RegClass::V128);
Inst::XmmRmR { op, src, dst }
Inst::XmmRmR {
op,
src,
dst,
srcloc,
}
}
pub(crate) fn xmm_uninit_value(dst: Writable<Reg>) -> Self {
@@ -869,6 +881,7 @@ impl Inst {
dst: Writable<Reg>,
imm: u8,
is64: bool,
srcloc: Option<SourceLoc>,
) -> Inst {
Inst::XmmRmRImm {
op,
@@ -876,6 +889,7 @@ impl Inst {
dst,
imm,
is64,
srcloc,
}
}
@@ -1233,16 +1247,26 @@ impl Inst {
/// Choose which instruction to use for comparing two values for equality.
pub(crate) fn equals(ty: Type, from: RegMem, to: Writable<Reg>) -> Inst {
match ty {
types::I8X16 | types::B8X16 => Inst::xmm_rm_r(SseOpcode::Pcmpeqb, from, to),
types::I16X8 | types::B16X8 => Inst::xmm_rm_r(SseOpcode::Pcmpeqw, from, to),
types::I32X4 | types::B32X4 => Inst::xmm_rm_r(SseOpcode::Pcmpeqd, from, to),
types::I64X2 | types::B64X2 => Inst::xmm_rm_r(SseOpcode::Pcmpeqq, from, to),
types::F32X4 => {
Inst::xmm_rm_r_imm(SseOpcode::Cmpps, from, to, FcmpImm::Equal.encode(), false)
}
types::F64X2 => {
Inst::xmm_rm_r_imm(SseOpcode::Cmppd, from, to, FcmpImm::Equal.encode(), false)
}
types::I8X16 | types::B8X16 => Inst::xmm_rm_r(SseOpcode::Pcmpeqb, from, to, None),
types::I16X8 | types::B16X8 => Inst::xmm_rm_r(SseOpcode::Pcmpeqw, from, to, None),
types::I32X4 | types::B32X4 => Inst::xmm_rm_r(SseOpcode::Pcmpeqd, from, to, None),
types::I64X2 | types::B64X2 => Inst::xmm_rm_r(SseOpcode::Pcmpeqq, from, to, None),
types::F32X4 => Inst::xmm_rm_r_imm(
SseOpcode::Cmpps,
from,
to,
FcmpImm::Equal.encode(),
false,
None,
),
types::F64X2 => Inst::xmm_rm_r_imm(
SseOpcode::Cmppd,
from,
to,
FcmpImm::Equal.encode(),
false,
None,
),
_ => unimplemented!("unimplemented type for Inst::equals: {}", ty),
}
}
@@ -1250,9 +1274,11 @@ impl Inst {
/// Choose which instruction to use for computing a bitwise AND on two values.
pub(crate) fn and(ty: Type, from: RegMem, to: Writable<Reg>) -> Inst {
match ty {
types::F32X4 => Inst::xmm_rm_r(SseOpcode::Andps, from, to),
types::F64X2 => Inst::xmm_rm_r(SseOpcode::Andpd, from, to),
_ if ty.is_vector() && ty.bits() == 128 => Inst::xmm_rm_r(SseOpcode::Pand, from, to),
types::F32X4 => Inst::xmm_rm_r(SseOpcode::Andps, from, to, None),
types::F64X2 => Inst::xmm_rm_r(SseOpcode::Andpd, from, to, None),
_ if ty.is_vector() && ty.bits() == 128 => {
Inst::xmm_rm_r(SseOpcode::Pand, from, to, None)
}
_ => unimplemented!("unimplemented type for Inst::and: {}", ty),
}
}
@@ -1260,9 +1286,11 @@ impl Inst {
/// Choose which instruction to use for computing a bitwise AND NOT on two values.
pub(crate) fn and_not(ty: Type, from: RegMem, to: Writable<Reg>) -> Inst {
match ty {
types::F32X4 => Inst::xmm_rm_r(SseOpcode::Andnps, from, to),
types::F64X2 => Inst::xmm_rm_r(SseOpcode::Andnpd, from, to),
_ if ty.is_vector() && ty.bits() == 128 => Inst::xmm_rm_r(SseOpcode::Pandn, from, to),
types::F32X4 => Inst::xmm_rm_r(SseOpcode::Andnps, from, to, None),
types::F64X2 => Inst::xmm_rm_r(SseOpcode::Andnpd, from, to, None),
_ if ty.is_vector() && ty.bits() == 128 => {
Inst::xmm_rm_r(SseOpcode::Pandn, from, to, None)
}
_ => unimplemented!("unimplemented type for Inst::and_not: {}", ty),
}
}
@@ -1270,9 +1298,11 @@ impl Inst {
/// Choose which instruction to use for computing a bitwise OR on two values.
pub(crate) fn or(ty: Type, from: RegMem, to: Writable<Reg>) -> Inst {
match ty {
types::F32X4 => Inst::xmm_rm_r(SseOpcode::Orps, from, to),
types::F64X2 => Inst::xmm_rm_r(SseOpcode::Orpd, from, to),
_ if ty.is_vector() && ty.bits() == 128 => Inst::xmm_rm_r(SseOpcode::Por, from, to),
types::F32X4 => Inst::xmm_rm_r(SseOpcode::Orps, from, to, None),
types::F64X2 => Inst::xmm_rm_r(SseOpcode::Orpd, from, to, None),
_ if ty.is_vector() && ty.bits() == 128 => {
Inst::xmm_rm_r(SseOpcode::Por, from, to, None)
}
_ => unimplemented!("unimplemented type for Inst::or: {}", ty),
}
}
@@ -1280,9 +1310,11 @@ impl Inst {
/// Choose which instruction to use for computing a bitwise XOR on two values.
pub(crate) fn xor(ty: Type, from: RegMem, to: Writable<Reg>) -> Inst {
match ty {
types::F32X4 => Inst::xmm_rm_r(SseOpcode::Xorps, from, to),
types::F64X2 => Inst::xmm_rm_r(SseOpcode::Xorpd, from, to),
_ if ty.is_vector() && ty.bits() == 128 => Inst::xmm_rm_r(SseOpcode::Pxor, from, to),
types::F32X4 => Inst::xmm_rm_r(SseOpcode::Xorps, from, to, None),
types::F64X2 => Inst::xmm_rm_r(SseOpcode::Xorpd, from, to, None),
_ if ty.is_vector() && ty.bits() == 128 => {
Inst::xmm_rm_r(SseOpcode::Pxor, from, to, None)
}
_ => unimplemented!("unimplemented type for Inst::xor: {}", ty),
}
}
@@ -1429,7 +1461,7 @@ impl PrettyPrint for Inst {
dst.show_rru(mb_rru),
),
Inst::XmmRmR { op, src, dst } => format!(
Inst::XmmRmR { op, src, dst, .. } => format!(
"{} {}, {}",
ljustify(op.to_string()),
src.show_rru_sized(mb_rru, 8),
@@ -1459,7 +1491,7 @@ impl PrettyPrint for Inst {
show_ireg_sized(rhs_dst.to_reg(), mb_rru, 8),
),
Inst::XmmRmRImm { op, src, dst, imm, is64 } => format!(
Inst::XmmRmRImm { op, src, dst, imm, is64, .. } => format!(
"{} ${}, {}, {}",
ljustify(format!("{}{}", op.to_string(), if *is64 { ".w" } else { "" })),
imm,
@@ -2595,6 +2627,7 @@ impl MachInst for Inst {
SseOpcode::Xorps,
RegMem::reg(to_reg.to_reg()),
to_reg,
None,
));
} else {
let tmp = alloc_tmp(RegClass::I64, types::I32);
@@ -2613,6 +2646,7 @@ impl MachInst for Inst {
SseOpcode::Xorpd,
RegMem::reg(to_reg.to_reg()),
to_reg,
None,
));
} else {
let tmp = alloc_tmp(RegClass::I64, types::I64);