[machinst x64]: add source locations to more instruction formats

In order to register traps for `load_splat`, several instruction formats need knowledge of `SourceLoc`s; however, since the x64 backend does not correctly and completely register traps for `RegMem::Mem` variants I opened https://github.com/bytecodealliance/wasmtime/issues/2290 to discuss and resolve this issue. In the meantime, the current behavior (i.e. remaining largely unaware of `SourceLoc`s) is retained.
This commit is contained in:
Andrew Brown
2020-10-13 10:02:12 -07:00
parent e0b911a4df
commit d990dd4c9a
4 changed files with 303 additions and 145 deletions

View File

@@ -1728,6 +1728,7 @@ pub(crate) fn emit(
op, op,
src: src_e, src: src_e,
dst: reg_g, dst: reg_g,
srcloc,
} => { } => {
let rex = RexFlags::clear_w(); let rex = RexFlags::clear_w();
let (prefix, opcode, length) = match op { let (prefix, opcode, length) = match op {
@@ -1819,6 +1820,10 @@ pub(crate) fn emit(
emit_std_reg_reg(sink, prefix, opcode, length, reg_g.to_reg(), *reg_e, rex); emit_std_reg_reg(sink, prefix, opcode, length, reg_g.to_reg(), *reg_e, rex);
} }
RegMem::Mem { addr } => { RegMem::Mem { addr } => {
if let Some(srcloc) = *srcloc {
// Register the offset at which the actual load instruction starts.
sink.add_trap(srcloc, TrapCode::HeapOutOfBounds);
}
let addr = &addr.finalize(state); let addr = &addr.finalize(state);
emit_std_reg_mem(sink, prefix, opcode, length, reg_g.to_reg(), addr, rex); emit_std_reg_mem(sink, prefix, opcode, length, reg_g.to_reg(), addr, rex);
} }
@@ -1889,7 +1894,7 @@ pub(crate) fn emit(
// and negative zero. These instructions merge the sign bits in that // and negative zero. These instructions merge the sign bits in that
// case, and are no-ops otherwise. // case, and are no-ops otherwise.
let op = if *is_min { or_op } else { and_op }; let op = if *is_min { or_op } else { and_op };
let inst = Inst::xmm_rm_r(op, RegMem::reg(*lhs), *rhs_dst); let inst = Inst::xmm_rm_r(op, RegMem::reg(*lhs), *rhs_dst, None);
inst.emit(sink, info, state); inst.emit(sink, info, state);
let inst = Inst::jmp_known(done); let inst = Inst::jmp_known(done);
@@ -1899,13 +1904,13 @@ pub(crate) fn emit(
// read-only operand: perform an addition between the two operands, which has the // read-only operand: perform an addition between the two operands, which has the
// desired NaN propagation effects. // desired NaN propagation effects.
sink.bind_label(propagate_nan); sink.bind_label(propagate_nan);
let inst = Inst::xmm_rm_r(add_op, RegMem::reg(*lhs), *rhs_dst); let inst = Inst::xmm_rm_r(add_op, RegMem::reg(*lhs), *rhs_dst, None);
inst.emit(sink, info, state); inst.emit(sink, info, state);
one_way_jmp(sink, CC::P, done); one_way_jmp(sink, CC::P, done);
sink.bind_label(do_min_max); sink.bind_label(do_min_max);
let inst = Inst::xmm_rm_r(min_max_op, RegMem::reg(*lhs), *rhs_dst); let inst = Inst::xmm_rm_r(min_max_op, RegMem::reg(*lhs), *rhs_dst, None);
inst.emit(sink, info, state); inst.emit(sink, info, state);
sink.bind_label(done); sink.bind_label(done);
@@ -1916,7 +1921,8 @@ pub(crate) fn emit(
src, src,
dst, dst,
imm, imm,
is64: w, is64,
srcloc,
} => { } => {
let (prefix, opcode, len) = match op { let (prefix, opcode, len) = match op {
SseOpcode::Cmpps => (LegacyPrefixes::None, 0x0FC2, 2), SseOpcode::Cmpps => (LegacyPrefixes::None, 0x0FC2, 2),
@@ -1933,7 +1939,7 @@ pub(crate) fn emit(
SseOpcode::Pshufd => (LegacyPrefixes::_66, 0x0F70, 2), SseOpcode::Pshufd => (LegacyPrefixes::_66, 0x0F70, 2),
_ => unimplemented!("Opcode {:?} not implemented", op), _ => unimplemented!("Opcode {:?} not implemented", op),
}; };
let rex = if *w { let rex = if *is64 {
RexFlags::set_w() RexFlags::set_w()
} else { } else {
RexFlags::clear_w() RexFlags::clear_w()
@@ -1955,6 +1961,10 @@ pub(crate) fn emit(
} }
} }
RegMem::Mem { addr } => { RegMem::Mem { addr } => {
if let Some(srcloc) = *srcloc {
// Register the offset at which the actual load instruction starts.
sink.add_trap(srcloc, TrapCode::HeapOutOfBounds);
}
let addr = &addr.finalize(state); let addr = &addr.finalize(state);
assert!( assert!(
!regs_swapped, !regs_swapped,
@@ -1963,7 +1973,7 @@ pub(crate) fn emit(
emit_std_reg_mem(sink, prefix, opcode, len, dst.to_reg(), addr, rex); emit_std_reg_mem(sink, prefix, opcode, len, dst.to_reg(), addr, rex);
} }
} }
sink.put1(*imm) sink.put1(*imm);
} }
Inst::XmmLoadConstSeq { val, dst, ty } => { Inst::XmmLoadConstSeq { val, dst, ty } => {
@@ -2188,7 +2198,7 @@ pub(crate) fn emit(
} else { } else {
SseOpcode::Addss SseOpcode::Addss
}; };
let inst = Inst::xmm_rm_r(add_op, RegMem::reg(dst.to_reg()), *dst); let inst = Inst::xmm_rm_r(add_op, RegMem::reg(dst.to_reg()), *dst, None);
inst.emit(sink, info, state); inst.emit(sink, info, state);
sink.bind_label(done); sink.bind_label(done);
@@ -2295,8 +2305,12 @@ pub(crate) fn emit(
// If the input was positive, saturate to INT_MAX. // If the input was positive, saturate to INT_MAX.
// Zero out tmp_xmm. // Zero out tmp_xmm.
let inst = let inst = Inst::xmm_rm_r(
Inst::xmm_rm_r(SseOpcode::Xorpd, RegMem::reg(tmp_xmm.to_reg()), *tmp_xmm); SseOpcode::Xorpd,
RegMem::reg(tmp_xmm.to_reg()),
*tmp_xmm,
None,
);
inst.emit(sink, info, state); inst.emit(sink, info, state);
let inst = Inst::xmm_cmp_rm_r(cmp_op, RegMem::reg(src), tmp_xmm.to_reg()); let inst = Inst::xmm_cmp_rm_r(cmp_op, RegMem::reg(src), tmp_xmm.to_reg());
@@ -2367,8 +2381,12 @@ pub(crate) fn emit(
sink.bind_label(check_positive); sink.bind_label(check_positive);
// Zero out the tmp_xmm register. // Zero out the tmp_xmm register.
let inst = let inst = Inst::xmm_rm_r(
Inst::xmm_rm_r(SseOpcode::Xorpd, RegMem::reg(tmp_xmm.to_reg()), *tmp_xmm); SseOpcode::Xorpd,
RegMem::reg(tmp_xmm.to_reg()),
*tmp_xmm,
None,
);
inst.emit(sink, info, state); inst.emit(sink, info, state);
let inst = Inst::xmm_cmp_rm_r(cmp_op, RegMem::reg(src), tmp_xmm.to_reg()); let inst = Inst::xmm_cmp_rm_r(cmp_op, RegMem::reg(src), tmp_xmm.to_reg());
@@ -2522,7 +2540,7 @@ pub(crate) fn emit(
sink.bind_label(handle_large); sink.bind_label(handle_large);
let inst = Inst::xmm_rm_r(sub_op, RegMem::reg(tmp_xmm.to_reg()), *src); let inst = Inst::xmm_rm_r(sub_op, RegMem::reg(tmp_xmm.to_reg()), *src, None);
inst.emit(sink, info, state); inst.emit(sink, info, state);
let inst = Inst::xmm_to_gpr(trunc_op, src.to_reg(), *dst, *dst_size); let inst = Inst::xmm_to_gpr(trunc_op, src.to_reg(), *dst, *dst_size);

View File

@@ -2983,12 +2983,12 @@ fn test_x64_emit() {
// XMM_RM_R: float binary ops // XMM_RM_R: float binary ops
insns.push(( insns.push((
Inst::xmm_rm_r(SseOpcode::Addss, RegMem::reg(xmm1), w_xmm0), Inst::xmm_rm_r(SseOpcode::Addss, RegMem::reg(xmm1), w_xmm0, None),
"F30F58C1", "F30F58C1",
"addss %xmm1, %xmm0", "addss %xmm1, %xmm0",
)); ));
insns.push(( insns.push((
Inst::xmm_rm_r(SseOpcode::Addss, RegMem::reg(xmm11), w_xmm13), Inst::xmm_rm_r(SseOpcode::Addss, RegMem::reg(xmm11), w_xmm13, None),
"F3450F58EB", "F3450F58EB",
"addss %xmm11, %xmm13", "addss %xmm11, %xmm13",
)); ));
@@ -2997,23 +2997,24 @@ fn test_x64_emit() {
SseOpcode::Addss, SseOpcode::Addss,
RegMem::mem(Amode::imm_reg_reg_shift(123, r10, rdx, 2)), RegMem::mem(Amode::imm_reg_reg_shift(123, r10, rdx, 2)),
w_xmm0, w_xmm0,
None,
), ),
"F3410F5844927B", "F3410F5844927B",
"addss 123(%r10,%rdx,4), %xmm0", "addss 123(%r10,%rdx,4), %xmm0",
)); ));
insns.push(( insns.push((
Inst::xmm_rm_r(SseOpcode::Addsd, RegMem::reg(xmm15), w_xmm4), Inst::xmm_rm_r(SseOpcode::Addsd, RegMem::reg(xmm15), w_xmm4, None),
"F2410F58E7", "F2410F58E7",
"addsd %xmm15, %xmm4", "addsd %xmm15, %xmm4",
)); ));
insns.push(( insns.push((
Inst::xmm_rm_r(SseOpcode::Subss, RegMem::reg(xmm0), w_xmm1), Inst::xmm_rm_r(SseOpcode::Subss, RegMem::reg(xmm0), w_xmm1, None),
"F30F5CC8", "F30F5CC8",
"subss %xmm0, %xmm1", "subss %xmm0, %xmm1",
)); ));
insns.push(( insns.push((
Inst::xmm_rm_r(SseOpcode::Subss, RegMem::reg(xmm12), w_xmm1), Inst::xmm_rm_r(SseOpcode::Subss, RegMem::reg(xmm12), w_xmm1, None),
"F3410F5CCC", "F3410F5CCC",
"subss %xmm12, %xmm1", "subss %xmm12, %xmm1",
)); ));
@@ -3022,57 +3023,58 @@ fn test_x64_emit() {
SseOpcode::Subss, SseOpcode::Subss,
RegMem::mem(Amode::imm_reg_reg_shift(321, r10, rax, 3)), RegMem::mem(Amode::imm_reg_reg_shift(321, r10, rax, 3)),
w_xmm10, w_xmm10,
None,
), ),
"F3450F5C94C241010000", "F3450F5C94C241010000",
"subss 321(%r10,%rax,8), %xmm10", "subss 321(%r10,%rax,8), %xmm10",
)); ));
insns.push(( insns.push((
Inst::xmm_rm_r(SseOpcode::Subsd, RegMem::reg(xmm5), w_xmm14), Inst::xmm_rm_r(SseOpcode::Subsd, RegMem::reg(xmm5), w_xmm14, None),
"F2440F5CF5", "F2440F5CF5",
"subsd %xmm5, %xmm14", "subsd %xmm5, %xmm14",
)); ));
insns.push(( insns.push((
Inst::xmm_rm_r(SseOpcode::Mulss, RegMem::reg(xmm5), w_xmm4), Inst::xmm_rm_r(SseOpcode::Mulss, RegMem::reg(xmm5), w_xmm4, None),
"F30F59E5", "F30F59E5",
"mulss %xmm5, %xmm4", "mulss %xmm5, %xmm4",
)); ));
insns.push(( insns.push((
Inst::xmm_rm_r(SseOpcode::Mulsd, RegMem::reg(xmm5), w_xmm4), Inst::xmm_rm_r(SseOpcode::Mulsd, RegMem::reg(xmm5), w_xmm4, None),
"F20F59E5", "F20F59E5",
"mulsd %xmm5, %xmm4", "mulsd %xmm5, %xmm4",
)); ));
insns.push(( insns.push((
Inst::xmm_rm_r(SseOpcode::Divss, RegMem::reg(xmm8), w_xmm7), Inst::xmm_rm_r(SseOpcode::Divss, RegMem::reg(xmm8), w_xmm7, None),
"F3410F5EF8", "F3410F5EF8",
"divss %xmm8, %xmm7", "divss %xmm8, %xmm7",
)); ));
insns.push(( insns.push((
Inst::xmm_rm_r(SseOpcode::Divsd, RegMem::reg(xmm5), w_xmm4), Inst::xmm_rm_r(SseOpcode::Divsd, RegMem::reg(xmm5), w_xmm4, None),
"F20F5EE5", "F20F5EE5",
"divsd %xmm5, %xmm4", "divsd %xmm5, %xmm4",
)); ));
insns.push(( insns.push((
Inst::xmm_rm_r(SseOpcode::Andps, RegMem::reg(xmm3), w_xmm12), Inst::xmm_rm_r(SseOpcode::Andps, RegMem::reg(xmm3), w_xmm12, None),
"440F54E3", "440F54E3",
"andps %xmm3, %xmm12", "andps %xmm3, %xmm12",
)); ));
insns.push(( insns.push((
Inst::xmm_rm_r(SseOpcode::Andnps, RegMem::reg(xmm4), w_xmm11), Inst::xmm_rm_r(SseOpcode::Andnps, RegMem::reg(xmm4), w_xmm11, None),
"440F55DC", "440F55DC",
"andnps %xmm4, %xmm11", "andnps %xmm4, %xmm11",
)); ));
insns.push(( insns.push((
Inst::xmm_rm_r(SseOpcode::Orps, RegMem::reg(xmm1), w_xmm15), Inst::xmm_rm_r(SseOpcode::Orps, RegMem::reg(xmm1), w_xmm15, None),
"440F56F9", "440F56F9",
"orps %xmm1, %xmm15", "orps %xmm1, %xmm15",
)); ));
insns.push(( insns.push((
Inst::xmm_rm_r(SseOpcode::Orps, RegMem::reg(xmm5), w_xmm4), Inst::xmm_rm_r(SseOpcode::Orps, RegMem::reg(xmm5), w_xmm4, None),
"0F56E5", "0F56E5",
"orps %xmm5, %xmm4", "orps %xmm5, %xmm4",
)); ));
@@ -3081,211 +3083,211 @@ fn test_x64_emit() {
// XMM_RM_R: Integer Packed // XMM_RM_R: Integer Packed
insns.push(( insns.push((
Inst::xmm_rm_r(SseOpcode::Paddb, RegMem::reg(xmm9), w_xmm5), Inst::xmm_rm_r(SseOpcode::Paddb, RegMem::reg(xmm9), w_xmm5, None),
"66410FFCE9", "66410FFCE9",
"paddb %xmm9, %xmm5", "paddb %xmm9, %xmm5",
)); ));
insns.push(( insns.push((
Inst::xmm_rm_r(SseOpcode::Paddw, RegMem::reg(xmm7), w_xmm6), Inst::xmm_rm_r(SseOpcode::Paddw, RegMem::reg(xmm7), w_xmm6, None),
"660FFDF7", "660FFDF7",
"paddw %xmm7, %xmm6", "paddw %xmm7, %xmm6",
)); ));
insns.push(( insns.push((
Inst::xmm_rm_r(SseOpcode::Paddd, RegMem::reg(xmm12), w_xmm13), Inst::xmm_rm_r(SseOpcode::Paddd, RegMem::reg(xmm12), w_xmm13, None),
"66450FFEEC", "66450FFEEC",
"paddd %xmm12, %xmm13", "paddd %xmm12, %xmm13",
)); ));
insns.push(( insns.push((
Inst::xmm_rm_r(SseOpcode::Paddq, RegMem::reg(xmm1), w_xmm8), Inst::xmm_rm_r(SseOpcode::Paddq, RegMem::reg(xmm1), w_xmm8, None),
"66440FD4C1", "66440FD4C1",
"paddq %xmm1, %xmm8", "paddq %xmm1, %xmm8",
)); ));
insns.push(( insns.push((
Inst::xmm_rm_r(SseOpcode::Paddsb, RegMem::reg(xmm9), w_xmm5), Inst::xmm_rm_r(SseOpcode::Paddsb, RegMem::reg(xmm9), w_xmm5, None),
"66410FECE9", "66410FECE9",
"paddsb %xmm9, %xmm5", "paddsb %xmm9, %xmm5",
)); ));
insns.push(( insns.push((
Inst::xmm_rm_r(SseOpcode::Paddsw, RegMem::reg(xmm7), w_xmm6), Inst::xmm_rm_r(SseOpcode::Paddsw, RegMem::reg(xmm7), w_xmm6, None),
"660FEDF7", "660FEDF7",
"paddsw %xmm7, %xmm6", "paddsw %xmm7, %xmm6",
)); ));
insns.push(( insns.push((
Inst::xmm_rm_r(SseOpcode::Paddusb, RegMem::reg(xmm12), w_xmm13), Inst::xmm_rm_r(SseOpcode::Paddusb, RegMem::reg(xmm12), w_xmm13, None),
"66450FDCEC", "66450FDCEC",
"paddusb %xmm12, %xmm13", "paddusb %xmm12, %xmm13",
)); ));
insns.push(( insns.push((
Inst::xmm_rm_r(SseOpcode::Paddusw, RegMem::reg(xmm1), w_xmm8), Inst::xmm_rm_r(SseOpcode::Paddusw, RegMem::reg(xmm1), w_xmm8, None),
"66440FDDC1", "66440FDDC1",
"paddusw %xmm1, %xmm8", "paddusw %xmm1, %xmm8",
)); ));
insns.push(( insns.push((
Inst::xmm_rm_r(SseOpcode::Psubsb, RegMem::reg(xmm9), w_xmm5), Inst::xmm_rm_r(SseOpcode::Psubsb, RegMem::reg(xmm9), w_xmm5, None),
"66410FE8E9", "66410FE8E9",
"psubsb %xmm9, %xmm5", "psubsb %xmm9, %xmm5",
)); ));
insns.push(( insns.push((
Inst::xmm_rm_r(SseOpcode::Psubsw, RegMem::reg(xmm7), w_xmm6), Inst::xmm_rm_r(SseOpcode::Psubsw, RegMem::reg(xmm7), w_xmm6, None),
"660FE9F7", "660FE9F7",
"psubsw %xmm7, %xmm6", "psubsw %xmm7, %xmm6",
)); ));
insns.push(( insns.push((
Inst::xmm_rm_r(SseOpcode::Psubusb, RegMem::reg(xmm12), w_xmm13), Inst::xmm_rm_r(SseOpcode::Psubusb, RegMem::reg(xmm12), w_xmm13, None),
"66450FD8EC", "66450FD8EC",
"psubusb %xmm12, %xmm13", "psubusb %xmm12, %xmm13",
)); ));
insns.push(( insns.push((
Inst::xmm_rm_r(SseOpcode::Psubusw, RegMem::reg(xmm1), w_xmm8), Inst::xmm_rm_r(SseOpcode::Psubusw, RegMem::reg(xmm1), w_xmm8, None),
"66440FD9C1", "66440FD9C1",
"psubusw %xmm1, %xmm8", "psubusw %xmm1, %xmm8",
)); ));
insns.push(( insns.push((
Inst::xmm_rm_r(SseOpcode::Pavgb, RegMem::reg(xmm12), w_xmm13), Inst::xmm_rm_r(SseOpcode::Pavgb, RegMem::reg(xmm12), w_xmm13, None),
"66450FE0EC", "66450FE0EC",
"pavgb %xmm12, %xmm13", "pavgb %xmm12, %xmm13",
)); ));
insns.push(( insns.push((
Inst::xmm_rm_r(SseOpcode::Pavgw, RegMem::reg(xmm1), w_xmm8), Inst::xmm_rm_r(SseOpcode::Pavgw, RegMem::reg(xmm1), w_xmm8, None),
"66440FE3C1", "66440FE3C1",
"pavgw %xmm1, %xmm8", "pavgw %xmm1, %xmm8",
)); ));
insns.push(( insns.push((
Inst::xmm_rm_r(SseOpcode::Psubb, RegMem::reg(xmm5), w_xmm9), Inst::xmm_rm_r(SseOpcode::Psubb, RegMem::reg(xmm5), w_xmm9, None),
"66440FF8CD", "66440FF8CD",
"psubb %xmm5, %xmm9", "psubb %xmm5, %xmm9",
)); ));
insns.push(( insns.push((
Inst::xmm_rm_r(SseOpcode::Psubw, RegMem::reg(xmm6), w_xmm7), Inst::xmm_rm_r(SseOpcode::Psubw, RegMem::reg(xmm6), w_xmm7, None),
"660FF9FE", "660FF9FE",
"psubw %xmm6, %xmm7", "psubw %xmm6, %xmm7",
)); ));
insns.push(( insns.push((
Inst::xmm_rm_r(SseOpcode::Psubd, RegMem::reg(xmm13), w_xmm12), Inst::xmm_rm_r(SseOpcode::Psubd, RegMem::reg(xmm13), w_xmm12, None),
"66450FFAE5", "66450FFAE5",
"psubd %xmm13, %xmm12", "psubd %xmm13, %xmm12",
)); ));
insns.push(( insns.push((
Inst::xmm_rm_r(SseOpcode::Psubq, RegMem::reg(xmm8), w_xmm1), Inst::xmm_rm_r(SseOpcode::Psubq, RegMem::reg(xmm8), w_xmm1, None),
"66410FFBC8", "66410FFBC8",
"psubq %xmm8, %xmm1", "psubq %xmm8, %xmm1",
)); ));
insns.push(( insns.push((
Inst::xmm_rm_r(SseOpcode::Pmulld, RegMem::reg(xmm15), w_xmm6), Inst::xmm_rm_r(SseOpcode::Pmulld, RegMem::reg(xmm15), w_xmm6, None),
"66410F3840F7", "66410F3840F7",
"pmulld %xmm15, %xmm6", "pmulld %xmm15, %xmm6",
)); ));
insns.push(( insns.push((
Inst::xmm_rm_r(SseOpcode::Pmullw, RegMem::reg(xmm14), w_xmm1), Inst::xmm_rm_r(SseOpcode::Pmullw, RegMem::reg(xmm14), w_xmm1, None),
"66410FD5CE", "66410FD5CE",
"pmullw %xmm14, %xmm1", "pmullw %xmm14, %xmm1",
)); ));
insns.push(( insns.push((
Inst::xmm_rm_r(SseOpcode::Pmuludq, RegMem::reg(xmm8), w_xmm9), Inst::xmm_rm_r(SseOpcode::Pmuludq, RegMem::reg(xmm8), w_xmm9, None),
"66450FF4C8", "66450FF4C8",
"pmuludq %xmm8, %xmm9", "pmuludq %xmm8, %xmm9",
)); ));
insns.push(( insns.push((
Inst::xmm_rm_r(SseOpcode::Pmaxsb, RegMem::reg(xmm15), w_xmm6), Inst::xmm_rm_r(SseOpcode::Pmaxsb, RegMem::reg(xmm15), w_xmm6, None),
"66410F383CF7", "66410F383CF7",
"pmaxsb %xmm15, %xmm6", "pmaxsb %xmm15, %xmm6",
)); ));
insns.push(( insns.push((
Inst::xmm_rm_r(SseOpcode::Pmaxsw, RegMem::reg(xmm15), w_xmm6), Inst::xmm_rm_r(SseOpcode::Pmaxsw, RegMem::reg(xmm15), w_xmm6, None),
"66410FEEF7", "66410FEEF7",
"pmaxsw %xmm15, %xmm6", "pmaxsw %xmm15, %xmm6",
)); ));
insns.push(( insns.push((
Inst::xmm_rm_r(SseOpcode::Pmaxsd, RegMem::reg(xmm15), w_xmm6), Inst::xmm_rm_r(SseOpcode::Pmaxsd, RegMem::reg(xmm15), w_xmm6, None),
"66410F383DF7", "66410F383DF7",
"pmaxsd %xmm15, %xmm6", "pmaxsd %xmm15, %xmm6",
)); ));
insns.push(( insns.push((
Inst::xmm_rm_r(SseOpcode::Pmaxub, RegMem::reg(xmm14), w_xmm1), Inst::xmm_rm_r(SseOpcode::Pmaxub, RegMem::reg(xmm14), w_xmm1, None),
"66410FDECE", "66410FDECE",
"pmaxub %xmm14, %xmm1", "pmaxub %xmm14, %xmm1",
)); ));
insns.push(( insns.push((
Inst::xmm_rm_r(SseOpcode::Pmaxuw, RegMem::reg(xmm14), w_xmm1), Inst::xmm_rm_r(SseOpcode::Pmaxuw, RegMem::reg(xmm14), w_xmm1, None),
"66410F383ECE", "66410F383ECE",
"pmaxuw %xmm14, %xmm1", "pmaxuw %xmm14, %xmm1",
)); ));
insns.push(( insns.push((
Inst::xmm_rm_r(SseOpcode::Pmaxud, RegMem::reg(xmm14), w_xmm1), Inst::xmm_rm_r(SseOpcode::Pmaxud, RegMem::reg(xmm14), w_xmm1, None),
"66410F383FCE", "66410F383FCE",
"pmaxud %xmm14, %xmm1", "pmaxud %xmm14, %xmm1",
)); ));
insns.push(( insns.push((
Inst::xmm_rm_r(SseOpcode::Pminsb, RegMem::reg(xmm8), w_xmm9), Inst::xmm_rm_r(SseOpcode::Pminsb, RegMem::reg(xmm8), w_xmm9, None),
"66450F3838C8", "66450F3838C8",
"pminsb %xmm8, %xmm9", "pminsb %xmm8, %xmm9",
)); ));
insns.push(( insns.push((
Inst::xmm_rm_r(SseOpcode::Pminsw, RegMem::reg(xmm8), w_xmm9), Inst::xmm_rm_r(SseOpcode::Pminsw, RegMem::reg(xmm8), w_xmm9, None),
"66450FEAC8", "66450FEAC8",
"pminsw %xmm8, %xmm9", "pminsw %xmm8, %xmm9",
)); ));
insns.push(( insns.push((
Inst::xmm_rm_r(SseOpcode::Pminsd, RegMem::reg(xmm8), w_xmm9), Inst::xmm_rm_r(SseOpcode::Pminsd, RegMem::reg(xmm8), w_xmm9, None),
"66450F3839C8", "66450F3839C8",
"pminsd %xmm8, %xmm9", "pminsd %xmm8, %xmm9",
)); ));
insns.push(( insns.push((
Inst::xmm_rm_r(SseOpcode::Pminub, RegMem::reg(xmm3), w_xmm2), Inst::xmm_rm_r(SseOpcode::Pminub, RegMem::reg(xmm3), w_xmm2, None),
"660FDAD3", "660FDAD3",
"pminub %xmm3, %xmm2", "pminub %xmm3, %xmm2",
)); ));
insns.push(( insns.push((
Inst::xmm_rm_r(SseOpcode::Pminuw, RegMem::reg(xmm3), w_xmm2), Inst::xmm_rm_r(SseOpcode::Pminuw, RegMem::reg(xmm3), w_xmm2, None),
"660F383AD3", "660F383AD3",
"pminuw %xmm3, %xmm2", "pminuw %xmm3, %xmm2",
)); ));
insns.push(( insns.push((
Inst::xmm_rm_r(SseOpcode::Pminud, RegMem::reg(xmm3), w_xmm2), Inst::xmm_rm_r(SseOpcode::Pminud, RegMem::reg(xmm3), w_xmm2, None),
"660F383BD3", "660F383BD3",
"pminud %xmm3, %xmm2", "pminud %xmm3, %xmm2",
)); ));
insns.push(( insns.push((
Inst::xmm_rm_r(SseOpcode::Pxor, RegMem::reg(xmm11), w_xmm2), Inst::xmm_rm_r(SseOpcode::Pxor, RegMem::reg(xmm11), w_xmm2, None),
"66410FEFD3", "66410FEFD3",
"pxor %xmm11, %xmm2", "pxor %xmm11, %xmm2",
)); ));
insns.push(( insns.push((
Inst::xmm_rm_r(SseOpcode::Pshufb, RegMem::reg(xmm11), w_xmm2), Inst::xmm_rm_r(SseOpcode::Pshufb, RegMem::reg(xmm11), w_xmm2, None),
"66410F3800D3", "66410F3800D3",
"pshufb %xmm11, %xmm2", "pshufb %xmm11, %xmm2",
)); ));
@@ -3488,12 +3490,12 @@ fn test_x64_emit() {
// ======================================================== // ========================================================
// XmmRmRImm // XmmRmRImm
insns.push(( insns.push((
Inst::xmm_rm_r_imm(SseOpcode::Cmppd, RegMem::reg(xmm5), w_xmm1, 2, false), Inst::xmm_rm_r_imm(SseOpcode::Cmppd, RegMem::reg(xmm5), w_xmm1, 2, false, None),
"660FC2CD02", "660FC2CD02",
"cmppd $2, %xmm5, %xmm1", "cmppd $2, %xmm5, %xmm1",
)); ));
insns.push(( insns.push((
Inst::xmm_rm_r_imm(SseOpcode::Cmpps, RegMem::reg(xmm15), w_xmm7, 0, false), Inst::xmm_rm_r_imm(SseOpcode::Cmpps, RegMem::reg(xmm15), w_xmm7, 0, false, None),
"410FC2FF00", "410FC2FF00",
"cmpps $0, %xmm15, %xmm7", "cmpps $0, %xmm15, %xmm7",
)); ));

View File

@@ -212,6 +212,7 @@ pub enum Inst {
op: SseOpcode, op: SseOpcode,
src: RegMem, src: RegMem,
dst: Writable<Reg>, dst: Writable<Reg>,
srcloc: Option<SourceLoc>,
}, },
/// XMM (scalar or vector) unary op: mov between XMM registers (32 64) (reg addr) reg, sqrt, /// XMM (scalar or vector) unary op: mov between XMM registers (32 64) (reg addr) reg, sqrt,
@@ -338,6 +339,7 @@ pub enum Inst {
dst: Writable<Reg>, dst: Writable<Reg>,
imm: u8, imm: u8,
is64: bool, is64: bool,
srcloc: Option<SourceLoc>,
}, },
// ===================================== // =====================================
@@ -711,10 +713,20 @@ impl Inst {
} }
} }
pub(crate) fn xmm_rm_r(op: SseOpcode, src: RegMem, dst: Writable<Reg>) -> Self { pub(crate) fn xmm_rm_r(
op: SseOpcode,
src: RegMem,
dst: Writable<Reg>,
srcloc: Option<SourceLoc>,
) -> Self {
src.assert_regclass_is(RegClass::V128); src.assert_regclass_is(RegClass::V128);
debug_assert!(dst.to_reg().get_class() == RegClass::V128); debug_assert!(dst.to_reg().get_class() == RegClass::V128);
Inst::XmmRmR { op, src, dst } Inst::XmmRmR {
op,
src,
dst,
srcloc,
}
} }
pub(crate) fn xmm_uninit_value(dst: Writable<Reg>) -> Self { pub(crate) fn xmm_uninit_value(dst: Writable<Reg>) -> Self {
@@ -869,6 +881,7 @@ impl Inst {
dst: Writable<Reg>, dst: Writable<Reg>,
imm: u8, imm: u8,
is64: bool, is64: bool,
srcloc: Option<SourceLoc>,
) -> Inst { ) -> Inst {
Inst::XmmRmRImm { Inst::XmmRmRImm {
op, op,
@@ -876,6 +889,7 @@ impl Inst {
dst, dst,
imm, imm,
is64, is64,
srcloc,
} }
} }
@@ -1233,16 +1247,26 @@ impl Inst {
/// Choose which instruction to use for comparing two values for equality. /// Choose which instruction to use for comparing two values for equality.
pub(crate) fn equals(ty: Type, from: RegMem, to: Writable<Reg>) -> Inst { pub(crate) fn equals(ty: Type, from: RegMem, to: Writable<Reg>) -> Inst {
match ty { match ty {
types::I8X16 | types::B8X16 => Inst::xmm_rm_r(SseOpcode::Pcmpeqb, from, to), types::I8X16 | types::B8X16 => Inst::xmm_rm_r(SseOpcode::Pcmpeqb, from, to, None),
types::I16X8 | types::B16X8 => Inst::xmm_rm_r(SseOpcode::Pcmpeqw, from, to), types::I16X8 | types::B16X8 => Inst::xmm_rm_r(SseOpcode::Pcmpeqw, from, to, None),
types::I32X4 | types::B32X4 => Inst::xmm_rm_r(SseOpcode::Pcmpeqd, from, to), types::I32X4 | types::B32X4 => Inst::xmm_rm_r(SseOpcode::Pcmpeqd, from, to, None),
types::I64X2 | types::B64X2 => Inst::xmm_rm_r(SseOpcode::Pcmpeqq, from, to), types::I64X2 | types::B64X2 => Inst::xmm_rm_r(SseOpcode::Pcmpeqq, from, to, None),
types::F32X4 => { types::F32X4 => Inst::xmm_rm_r_imm(
Inst::xmm_rm_r_imm(SseOpcode::Cmpps, from, to, FcmpImm::Equal.encode(), false) SseOpcode::Cmpps,
} from,
types::F64X2 => { to,
Inst::xmm_rm_r_imm(SseOpcode::Cmppd, from, to, FcmpImm::Equal.encode(), false) FcmpImm::Equal.encode(),
} false,
None,
),
types::F64X2 => Inst::xmm_rm_r_imm(
SseOpcode::Cmppd,
from,
to,
FcmpImm::Equal.encode(),
false,
None,
),
_ => unimplemented!("unimplemented type for Inst::equals: {}", ty), _ => unimplemented!("unimplemented type for Inst::equals: {}", ty),
} }
} }
@@ -1250,9 +1274,11 @@ impl Inst {
/// Choose which instruction to use for computing a bitwise AND on two values. /// Choose which instruction to use for computing a bitwise AND on two values.
pub(crate) fn and(ty: Type, from: RegMem, to: Writable<Reg>) -> Inst { pub(crate) fn and(ty: Type, from: RegMem, to: Writable<Reg>) -> Inst {
match ty { match ty {
types::F32X4 => Inst::xmm_rm_r(SseOpcode::Andps, from, to), types::F32X4 => Inst::xmm_rm_r(SseOpcode::Andps, from, to, None),
types::F64X2 => Inst::xmm_rm_r(SseOpcode::Andpd, from, to), types::F64X2 => Inst::xmm_rm_r(SseOpcode::Andpd, from, to, None),
_ if ty.is_vector() && ty.bits() == 128 => Inst::xmm_rm_r(SseOpcode::Pand, from, to), _ if ty.is_vector() && ty.bits() == 128 => {
Inst::xmm_rm_r(SseOpcode::Pand, from, to, None)
}
_ => unimplemented!("unimplemented type for Inst::and: {}", ty), _ => unimplemented!("unimplemented type for Inst::and: {}", ty),
} }
} }
@@ -1260,9 +1286,11 @@ impl Inst {
/// Choose which instruction to use for computing a bitwise AND NOT on two values. /// Choose which instruction to use for computing a bitwise AND NOT on two values.
pub(crate) fn and_not(ty: Type, from: RegMem, to: Writable<Reg>) -> Inst { pub(crate) fn and_not(ty: Type, from: RegMem, to: Writable<Reg>) -> Inst {
match ty { match ty {
types::F32X4 => Inst::xmm_rm_r(SseOpcode::Andnps, from, to), types::F32X4 => Inst::xmm_rm_r(SseOpcode::Andnps, from, to, None),
types::F64X2 => Inst::xmm_rm_r(SseOpcode::Andnpd, from, to), types::F64X2 => Inst::xmm_rm_r(SseOpcode::Andnpd, from, to, None),
_ if ty.is_vector() && ty.bits() == 128 => Inst::xmm_rm_r(SseOpcode::Pandn, from, to), _ if ty.is_vector() && ty.bits() == 128 => {
Inst::xmm_rm_r(SseOpcode::Pandn, from, to, None)
}
_ => unimplemented!("unimplemented type for Inst::and_not: {}", ty), _ => unimplemented!("unimplemented type for Inst::and_not: {}", ty),
} }
} }
@@ -1270,9 +1298,11 @@ impl Inst {
/// Choose which instruction to use for computing a bitwise OR on two values. /// Choose which instruction to use for computing a bitwise OR on two values.
pub(crate) fn or(ty: Type, from: RegMem, to: Writable<Reg>) -> Inst { pub(crate) fn or(ty: Type, from: RegMem, to: Writable<Reg>) -> Inst {
match ty { match ty {
types::F32X4 => Inst::xmm_rm_r(SseOpcode::Orps, from, to), types::F32X4 => Inst::xmm_rm_r(SseOpcode::Orps, from, to, None),
types::F64X2 => Inst::xmm_rm_r(SseOpcode::Orpd, from, to), types::F64X2 => Inst::xmm_rm_r(SseOpcode::Orpd, from, to, None),
_ if ty.is_vector() && ty.bits() == 128 => Inst::xmm_rm_r(SseOpcode::Por, from, to), _ if ty.is_vector() && ty.bits() == 128 => {
Inst::xmm_rm_r(SseOpcode::Por, from, to, None)
}
_ => unimplemented!("unimplemented type for Inst::or: {}", ty), _ => unimplemented!("unimplemented type for Inst::or: {}", ty),
} }
} }
@@ -1280,9 +1310,11 @@ impl Inst {
/// Choose which instruction to use for computing a bitwise XOR on two values. /// Choose which instruction to use for computing a bitwise XOR on two values.
pub(crate) fn xor(ty: Type, from: RegMem, to: Writable<Reg>) -> Inst { pub(crate) fn xor(ty: Type, from: RegMem, to: Writable<Reg>) -> Inst {
match ty { match ty {
types::F32X4 => Inst::xmm_rm_r(SseOpcode::Xorps, from, to), types::F32X4 => Inst::xmm_rm_r(SseOpcode::Xorps, from, to, None),
types::F64X2 => Inst::xmm_rm_r(SseOpcode::Xorpd, from, to), types::F64X2 => Inst::xmm_rm_r(SseOpcode::Xorpd, from, to, None),
_ if ty.is_vector() && ty.bits() == 128 => Inst::xmm_rm_r(SseOpcode::Pxor, from, to), _ if ty.is_vector() && ty.bits() == 128 => {
Inst::xmm_rm_r(SseOpcode::Pxor, from, to, None)
}
_ => unimplemented!("unimplemented type for Inst::xor: {}", ty), _ => unimplemented!("unimplemented type for Inst::xor: {}", ty),
} }
} }
@@ -1429,7 +1461,7 @@ impl PrettyPrint for Inst {
dst.show_rru(mb_rru), dst.show_rru(mb_rru),
), ),
Inst::XmmRmR { op, src, dst } => format!( Inst::XmmRmR { op, src, dst, .. } => format!(
"{} {}, {}", "{} {}, {}",
ljustify(op.to_string()), ljustify(op.to_string()),
src.show_rru_sized(mb_rru, 8), src.show_rru_sized(mb_rru, 8),
@@ -1459,7 +1491,7 @@ impl PrettyPrint for Inst {
show_ireg_sized(rhs_dst.to_reg(), mb_rru, 8), show_ireg_sized(rhs_dst.to_reg(), mb_rru, 8),
), ),
Inst::XmmRmRImm { op, src, dst, imm, is64 } => format!( Inst::XmmRmRImm { op, src, dst, imm, is64, .. } => format!(
"{} ${}, {}, {}", "{} ${}, {}, {}",
ljustify(format!("{}{}", op.to_string(), if *is64 { ".w" } else { "" })), ljustify(format!("{}{}", op.to_string(), if *is64 { ".w" } else { "" })),
imm, imm,
@@ -2595,6 +2627,7 @@ impl MachInst for Inst {
SseOpcode::Xorps, SseOpcode::Xorps,
RegMem::reg(to_reg.to_reg()), RegMem::reg(to_reg.to_reg()),
to_reg, to_reg,
None,
)); ));
} else { } else {
let tmp = alloc_tmp(RegClass::I64, types::I32); let tmp = alloc_tmp(RegClass::I64, types::I32);
@@ -2613,6 +2646,7 @@ impl MachInst for Inst {
SseOpcode::Xorpd, SseOpcode::Xorpd,
RegMem::reg(to_reg.to_reg()), RegMem::reg(to_reg.to_reg()),
to_reg, to_reg,
None,
)); ));
} else { } else {
let tmp = alloc_tmp(RegClass::I64, types::I64); let tmp = alloc_tmp(RegClass::I64, types::I64);

View File

@@ -3,7 +3,7 @@
use crate::data_value::DataValue; use crate::data_value::DataValue;
use crate::ir::{ use crate::ir::{
condcodes::FloatCC, condcodes::IntCC, types, AbiParam, ArgumentPurpose, ExternalName, condcodes::FloatCC, condcodes::IntCC, types, AbiParam, ArgumentPurpose, ExternalName,
Inst as IRInst, InstructionData, LibCall, Opcode, Signature, Type, Inst as IRInst, InstructionData, LibCall, Opcode, Signature, SourceLoc, Type,
}; };
use crate::isa::x64::abi::*; use crate::isa::x64::abi::*;
use crate::isa::x64::inst::args::*; use crate::isa::x64::inst::args::*;
@@ -227,6 +227,7 @@ fn emit_insert_lane<C: LowerCtx<I = Inst>>(
dst: Writable<Reg>, dst: Writable<Reg>,
lane: u8, lane: u8,
ty: Type, ty: Type,
srcloc: Option<SourceLoc>,
) { ) {
if !ty.is_float() { if !ty.is_float() {
let (sse_op, is64) = match ty.lane_bits() { let (sse_op, is64) = match ty.lane_bits() {
@@ -236,13 +237,13 @@ fn emit_insert_lane<C: LowerCtx<I = Inst>>(
64 => (SseOpcode::Pinsrd, true), 64 => (SseOpcode::Pinsrd, true),
_ => panic!("Unable to insertlane for lane size: {}", ty.lane_bits()), _ => panic!("Unable to insertlane for lane size: {}", ty.lane_bits()),
}; };
ctx.emit(Inst::xmm_rm_r_imm(sse_op, src, dst, lane, is64)); ctx.emit(Inst::xmm_rm_r_imm(sse_op, src, dst, lane, is64, srcloc));
} else if ty == types::F32 { } else if ty == types::F32 {
let sse_op = SseOpcode::Insertps; let sse_op = SseOpcode::Insertps;
// Insert 32-bits from replacement (at index 00, bits 7:8) to vector (lane // Insert 32-bits from replacement (at index 00, bits 7:8) to vector (lane
// shifted into bits 5:6). // shifted into bits 5:6).
let lane = 0b00_00_00_00 | lane << 4; let lane = 0b00_00_00_00 | lane << 4;
ctx.emit(Inst::xmm_rm_r_imm(sse_op, src, dst, lane, false)); ctx.emit(Inst::xmm_rm_r_imm(sse_op, src, dst, lane, false, srcloc));
} else if ty == types::F64 { } else if ty == types::F64 {
let sse_op = match lane { let sse_op = match lane {
// Move the lowest quadword in replacement to vector without changing // Move the lowest quadword in replacement to vector without changing
@@ -256,7 +257,7 @@ fn emit_insert_lane<C: LowerCtx<I = Inst>>(
// Here we use the `xmm_rm_r` encoding because it correctly tells the register // Here we use the `xmm_rm_r` encoding because it correctly tells the register
// allocator how we are using `dst`: we are using `dst` as a `mod` whereas other // allocator how we are using `dst`: we are using `dst` as a `mod` whereas other
// encoding formats like `xmm_unary_rm_r` treat it as a `def`. // encoding formats like `xmm_unary_rm_r` treat it as a `def`.
ctx.emit(Inst::xmm_rm_r(sse_op, src, dst)); ctx.emit(Inst::xmm_rm_r(sse_op, src, dst, srcloc));
} else { } else {
panic!("unable to emit insertlane for type: {}", ty) panic!("unable to emit insertlane for type: {}", ty)
} }
@@ -694,6 +695,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
SseOpcode::Pmuludq, SseOpcode::Pmuludq,
RegMem::reg(lhs.clone()), RegMem::reg(lhs.clone()),
rhs_1, rhs_1,
None,
)); ));
// B' = B // B' = B
@@ -707,7 +709,12 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
RegMemImm::imm(32), RegMemImm::imm(32),
lhs_1, lhs_1,
)); ));
ctx.emit(Inst::xmm_rm_r(SseOpcode::Pmuludq, RegMem::reg(rhs), lhs_1)); ctx.emit(Inst::xmm_rm_r(
SseOpcode::Pmuludq,
RegMem::reg(rhs),
lhs_1,
None,
));
// B' = B' + A' // B' = B' + A'
// B' = B' << 32 // B' = B' << 32
@@ -715,6 +722,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
SseOpcode::Paddq, SseOpcode::Paddq,
RegMem::reg(rhs_1.to_reg()), RegMem::reg(rhs_1.to_reg()),
lhs_1, lhs_1,
None,
)); ));
ctx.emit(Inst::xmm_rmi_reg( ctx.emit(Inst::xmm_rmi_reg(
SseOpcode::Psllq, SseOpcode::Psllq,
@@ -731,11 +739,13 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
SseOpcode::Pmuludq, SseOpcode::Pmuludq,
RegMem::reg(lhs.clone()), RegMem::reg(lhs.clone()),
rhs_1, rhs_1,
None,
)); ));
ctx.emit(Inst::xmm_rm_r( ctx.emit(Inst::xmm_rm_r(
SseOpcode::Paddq, SseOpcode::Paddq,
RegMem::reg(lhs_1.to_reg()), RegMem::reg(lhs_1.to_reg()),
rhs_1, rhs_1,
None,
)); ));
ctx.emit(Inst::gen_move(dst, rhs_1.to_reg(), ty)); ctx.emit(Inst::gen_move(dst, rhs_1.to_reg(), ty));
return Ok(()); return Ok(());
@@ -770,7 +780,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
// Move the `lhs` to the same register as `dst`. // Move the `lhs` to the same register as `dst`.
ctx.emit(Inst::gen_move(dst, lhs, ty)); ctx.emit(Inst::gen_move(dst, lhs, ty));
ctx.emit(Inst::xmm_rm_r(sse_op, rhs, dst)); ctx.emit(Inst::xmm_rm_r(sse_op, rhs, dst, None));
} else { } else {
let is_64 = ty == types::I64; let is_64 = ty == types::I64;
let alu_op = match op { let alu_op = match op {
@@ -828,7 +838,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
// Note the flipping of operands: the `rhs` operand is used as the destination instead // Note the flipping of operands: the `rhs` operand is used as the destination instead
// of the `lhs` as in the other bit operations above (e.g. `band`). // of the `lhs` as in the other bit operations above (e.g. `band`).
ctx.emit(Inst::gen_move(dst, rhs, ty)); ctx.emit(Inst::gen_move(dst, rhs, ty));
ctx.emit(Inst::xmm_rm_r(sse_op, lhs, dst)); ctx.emit(Inst::xmm_rm_r(sse_op, lhs, dst, None));
} }
Opcode::Iabs => { Opcode::Iabs => {
@@ -884,7 +894,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
// Move the `lhs` to the same register as `dst`. // Move the `lhs` to the same register as `dst`.
ctx.emit(Inst::gen_move(dst, lhs, ty)); ctx.emit(Inst::gen_move(dst, lhs, ty));
ctx.emit(Inst::xmm_rm_r(sse_op, rhs, dst)); ctx.emit(Inst::xmm_rm_r(sse_op, rhs, dst, None));
} else { } else {
panic!("Unsupported type for {} instruction: {}", op, ty); panic!("Unsupported type for {} instruction: {}", op, ty);
} }
@@ -1007,8 +1017,9 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
SseOpcode::Pxor, SseOpcode::Pxor,
RegMem::reg(tmp.to_reg()), RegMem::reg(tmp.to_reg()),
tmp, tmp,
None,
)); ));
ctx.emit(Inst::xmm_rm_r(subtract_opcode, src, tmp)); ctx.emit(Inst::xmm_rm_r(subtract_opcode, src, tmp, None));
ctx.emit(Inst::xmm_unary_rm_r( ctx.emit(Inst::xmm_unary_rm_r(
SseOpcode::Movapd, SseOpcode::Movapd,
RegMem::reg(tmp.to_reg()), RegMem::reg(tmp.to_reg()),
@@ -1561,34 +1572,44 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
}; };
match condcode { match condcode {
IntCC::Equal => ctx.emit(Inst::xmm_rm_r(eq(ty), input, dst)), IntCC::Equal => ctx.emit(Inst::xmm_rm_r(eq(ty), input, dst, None)),
IntCC::NotEqual => { IntCC::NotEqual => {
ctx.emit(Inst::xmm_rm_r(eq(ty), input, dst)); ctx.emit(Inst::xmm_rm_r(eq(ty), input, dst, None));
// Emit all 1s into the `tmp` register. // Emit all 1s into the `tmp` register.
let tmp = ctx.alloc_tmp(RegClass::V128, ty); let tmp = ctx.alloc_tmp(RegClass::V128, ty);
ctx.emit(Inst::xmm_rm_r(eq(ty), RegMem::from(tmp), tmp)); ctx.emit(Inst::xmm_rm_r(eq(ty), RegMem::from(tmp), tmp, None));
// Invert the result of the `PCMPEQ*`. // Invert the result of the `PCMPEQ*`.
ctx.emit(Inst::xmm_rm_r(SseOpcode::Pxor, RegMem::from(tmp), dst)); ctx.emit(Inst::xmm_rm_r(
SseOpcode::Pxor,
RegMem::from(tmp),
dst,
None,
));
} }
IntCC::SignedGreaterThan | IntCC::SignedLessThan => { IntCC::SignedGreaterThan | IntCC::SignedLessThan => {
ctx.emit(Inst::xmm_rm_r(gt(ty), input, dst)) ctx.emit(Inst::xmm_rm_r(gt(ty), input, dst, None))
} }
IntCC::SignedGreaterThanOrEqual | IntCC::SignedLessThanOrEqual => { IntCC::SignedGreaterThanOrEqual | IntCC::SignedLessThanOrEqual => {
ctx.emit(Inst::xmm_rm_r(mins(ty), input.clone(), dst)); ctx.emit(Inst::xmm_rm_r(mins(ty), input.clone(), dst, None));
ctx.emit(Inst::xmm_rm_r(eq(ty), input, dst)) ctx.emit(Inst::xmm_rm_r(eq(ty), input, dst, None))
} }
IntCC::UnsignedGreaterThan | IntCC::UnsignedLessThan => { IntCC::UnsignedGreaterThan | IntCC::UnsignedLessThan => {
ctx.emit(Inst::xmm_rm_r(maxu(ty), input.clone(), dst)); ctx.emit(Inst::xmm_rm_r(maxu(ty), input.clone(), dst, None));
ctx.emit(Inst::xmm_rm_r(eq(ty), input, dst)); ctx.emit(Inst::xmm_rm_r(eq(ty), input, dst, None));
// Emit all 1s into the `tmp` register. // Emit all 1s into the `tmp` register.
let tmp = ctx.alloc_tmp(RegClass::V128, ty); let tmp = ctx.alloc_tmp(RegClass::V128, ty);
ctx.emit(Inst::xmm_rm_r(eq(ty), RegMem::from(tmp), tmp)); ctx.emit(Inst::xmm_rm_r(eq(ty), RegMem::from(tmp), tmp, None));
// Invert the result of the `PCMPEQ*`. // Invert the result of the `PCMPEQ*`.
ctx.emit(Inst::xmm_rm_r(SseOpcode::Pxor, RegMem::from(tmp), dst)); ctx.emit(Inst::xmm_rm_r(
SseOpcode::Pxor,
RegMem::from(tmp),
dst,
None,
));
} }
IntCC::UnsignedGreaterThanOrEqual | IntCC::UnsignedLessThanOrEqual => { IntCC::UnsignedGreaterThanOrEqual | IntCC::UnsignedLessThanOrEqual => {
ctx.emit(Inst::xmm_rm_r(minu(ty), input.clone(), dst)); ctx.emit(Inst::xmm_rm_r(minu(ty), input.clone(), dst, None));
ctx.emit(Inst::xmm_rm_r(eq(ty), input, dst)) ctx.emit(Inst::xmm_rm_r(eq(ty), input, dst, None))
} }
_ => unimplemented!("Unimplemented comparison code for icmp: {}", condcode), _ => unimplemented!("Unimplemented comparison code for icmp: {}", condcode),
} }
@@ -1686,7 +1707,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
ctx.emit(Inst::gen_move(dst, lhs, input_ty)); ctx.emit(Inst::gen_move(dst, lhs, input_ty));
// Emit the comparison. // Emit the comparison.
ctx.emit(Inst::xmm_rm_r_imm(op, rhs, dst, imm.encode(), false)); ctx.emit(Inst::xmm_rm_r_imm(op, rhs, dst, imm.encode(), false, None));
} }
} }
@@ -1899,7 +1920,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
ty ty
), ),
}; };
ctx.emit(Inst::xmm_rm_r(sse_op, rhs, dst)); ctx.emit(Inst::xmm_rm_r(sse_op, rhs, dst, None));
} }
Opcode::Fmin | Opcode::Fmax => { Opcode::Fmin | Opcode::Fmax => {
@@ -1988,15 +2009,15 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
ctx.emit(Inst::xmm_mov(mov_op, RegMem::reg(lhs), tmp_xmm1, None)); ctx.emit(Inst::xmm_mov(mov_op, RegMem::reg(lhs), tmp_xmm1, None));
// Perform min in reverse direction // Perform min in reverse direction
ctx.emit(Inst::xmm_rm_r(min_op, RegMem::from(dst), tmp_xmm1)); ctx.emit(Inst::xmm_rm_r(min_op, RegMem::from(dst), tmp_xmm1, None));
// Perform min in original direction // Perform min in original direction
ctx.emit(Inst::xmm_rm_r(min_op, RegMem::reg(lhs), dst)); ctx.emit(Inst::xmm_rm_r(min_op, RegMem::reg(lhs), dst, None));
// X64 handles propagation of -0's and Nans differently between left and right // X64 handles propagation of -0's and Nans differently between left and right
// operands. After doing the min in both directions, this OR will // operands. After doing the min in both directions, this OR will
// guarrentee capture of -0's and Nan in our tmp register // guarrentee capture of -0's and Nan in our tmp register
ctx.emit(Inst::xmm_rm_r(or_op, RegMem::from(dst), tmp_xmm1)); ctx.emit(Inst::xmm_rm_r(or_op, RegMem::from(dst), tmp_xmm1, None));
// Compare unordered to create mask for lanes containing NaNs and then use // Compare unordered to create mask for lanes containing NaNs and then use
// that mask to saturate the NaN containing lanes in the tmp register with 1s. // that mask to saturate the NaN containing lanes in the tmp register with 1s.
@@ -2009,8 +2030,14 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
dst, dst,
cond.encode(), cond.encode(),
false, false,
None,
));
ctx.emit(Inst::xmm_rm_r(
or_op,
RegMem::reg(dst.to_reg()),
tmp_xmm1,
None,
)); ));
ctx.emit(Inst::xmm_rm_r(or_op, RegMem::reg(dst.to_reg()), tmp_xmm1));
// The dst register holds a mask for lanes containing NaNs. // The dst register holds a mask for lanes containing NaNs.
// We take that mask and shift in preparation for creating a different mask // We take that mask and shift in preparation for creating a different mask
@@ -2022,7 +2049,12 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
// Finally we do a nand with the tmp register to produce the final results // Finally we do a nand with the tmp register to produce the final results
// in the dst. // in the dst.
ctx.emit(Inst::xmm_rm_r(andn_op, RegMem::reg(tmp_xmm1.to_reg()), dst)); ctx.emit(Inst::xmm_rm_r(
andn_op,
RegMem::reg(tmp_xmm1.to_reg()),
dst,
None,
));
} else { } else {
let ( let (
mov_op, mov_op,
@@ -2065,23 +2097,43 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
ctx.emit(Inst::xmm_mov(mov_op, RegMem::reg(lhs), tmp_xmm1, None)); ctx.emit(Inst::xmm_mov(mov_op, RegMem::reg(lhs), tmp_xmm1, None));
// Perform max in reverse direction. // Perform max in reverse direction.
ctx.emit(Inst::xmm_rm_r(max_op, RegMem::reg(dst.to_reg()), tmp_xmm1)); ctx.emit(Inst::xmm_rm_r(
max_op,
RegMem::reg(dst.to_reg()),
tmp_xmm1,
None,
));
// Perform max in original direction. // Perform max in original direction.
ctx.emit(Inst::xmm_rm_r(max_op, RegMem::reg(lhs), dst)); ctx.emit(Inst::xmm_rm_r(max_op, RegMem::reg(lhs), dst, None));
// Get the difference between the two results and store in tmp. // Get the difference between the two results and store in tmp.
// Max uses a different approach than min to account for potential // Max uses a different approach than min to account for potential
// discrepancies with plus/minus 0. // discrepancies with plus/minus 0.
ctx.emit(Inst::xmm_rm_r(xor_op, RegMem::reg(tmp_xmm1.to_reg()), dst)); ctx.emit(Inst::xmm_rm_r(
xor_op,
RegMem::reg(tmp_xmm1.to_reg()),
dst,
None,
));
// X64 handles propagation of -0's and Nans differently between left and right // X64 handles propagation of -0's and Nans differently between left and right
// operands. After doing the max in both directions, this OR will // operands. After doing the max in both directions, this OR will
// guarentee capture of 0's and Nan in our tmp register. // guarentee capture of 0's and Nan in our tmp register.
ctx.emit(Inst::xmm_rm_r(or_op, RegMem::reg(dst.to_reg()), tmp_xmm1)); ctx.emit(Inst::xmm_rm_r(
or_op,
RegMem::reg(dst.to_reg()),
tmp_xmm1,
None,
));
// Capture NaNs and sign discrepancies. // Capture NaNs and sign discrepancies.
ctx.emit(Inst::xmm_rm_r(sub_op, RegMem::reg(dst.to_reg()), tmp_xmm1)); ctx.emit(Inst::xmm_rm_r(
sub_op,
RegMem::reg(dst.to_reg()),
tmp_xmm1,
None,
));
// Compare unordered to create mask for lanes containing NaNs and then use // Compare unordered to create mask for lanes containing NaNs and then use
// that mask to saturate the NaN containing lanes in the tmp register with 1s. // that mask to saturate the NaN containing lanes in the tmp register with 1s.
@@ -2092,6 +2144,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
dst, dst,
cond.encode(), cond.encode(),
false, false,
None,
)); ));
// The dst register holds a mask for lanes containing NaNs. // The dst register holds a mask for lanes containing NaNs.
@@ -2104,7 +2157,12 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
// Finally we do a nand with the tmp register to produce the final results // Finally we do a nand with the tmp register to produce the final results
// in the dst. // in the dst.
ctx.emit(Inst::xmm_rm_r(andn_op, RegMem::reg(tmp_xmm1.to_reg()), dst)); ctx.emit(Inst::xmm_rm_r(
andn_op,
RegMem::reg(tmp_xmm1.to_reg()),
dst,
None,
));
} }
} }
} }
@@ -2327,7 +2385,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
ctx.emit(inst); ctx.emit(inst);
} }
ctx.emit(Inst::xmm_rm_r(opcode, src, dst)); ctx.emit(Inst::xmm_rm_r(opcode, src, dst, None));
} else { } else {
// Eventually vector constants should be available in `gen_constant` and this block // Eventually vector constants should be available in `gen_constant` and this block
// can be merged with the one above (TODO). // can be merged with the one above (TODO).
@@ -2348,6 +2406,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
tmp, tmp,
cond.encode(), cond.encode(),
false, false,
None,
); );
ctx.emit(cmpps); ctx.emit(cmpps);
@@ -2367,7 +2426,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
ctx.emit(shift); ctx.emit(shift);
// Apply shifted mask (XOR or AND). // Apply shifted mask (XOR or AND).
let mask = Inst::xmm_rm_r(opcode, RegMem::reg(tmp.to_reg()), dst); let mask = Inst::xmm_rm_r(opcode, RegMem::reg(tmp.to_reg()), dst, None);
ctx.emit(mask); ctx.emit(mask);
} else { } else {
panic!("unexpected type {:?} for Fabs", output_ty); panic!("unexpected type {:?} for Fabs", output_ty);
@@ -2426,14 +2485,20 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
dst, dst,
None, None,
)); ));
ctx.emit(Inst::xmm_rm_r(and_not_op, RegMem::reg(lhs), dst)); ctx.emit(Inst::xmm_rm_r(and_not_op, RegMem::reg(lhs), dst, None));
ctx.emit(Inst::xmm_mov(mov_op, RegMem::reg(rhs), tmp_xmm2, None)); ctx.emit(Inst::xmm_mov(mov_op, RegMem::reg(rhs), tmp_xmm2, None));
ctx.emit(Inst::xmm_rm_r( ctx.emit(Inst::xmm_rm_r(
and_op, and_op,
RegMem::reg(tmp_xmm1.to_reg()), RegMem::reg(tmp_xmm1.to_reg()),
tmp_xmm2, tmp_xmm2,
None,
));
ctx.emit(Inst::xmm_rm_r(
or_op,
RegMem::reg(tmp_xmm2.to_reg()),
dst,
None,
)); ));
ctx.emit(Inst::xmm_rm_r(or_op, RegMem::reg(tmp_xmm2.to_reg()), dst));
} }
Opcode::Ceil | Opcode::Floor | Opcode::Nearest | Opcode::Trunc => { Opcode::Ceil | Opcode::Floor | Opcode::Nearest | Opcode::Trunc => {
@@ -3154,7 +3219,12 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
// After loading the constructed mask in a temporary register, we use this to // After loading the constructed mask in a temporary register, we use this to
// shuffle the `dst` register (remember that, in this case, it is the same as // shuffle the `dst` register (remember that, in this case, it is the same as
// `src` so we disregard this register). // `src` so we disregard this register).
ctx.emit(Inst::xmm_rm_r(SseOpcode::Pshufb, RegMem::from(tmp), dst)); ctx.emit(Inst::xmm_rm_r(
SseOpcode::Pshufb,
RegMem::from(tmp),
dst,
None,
));
} else { } else {
// If `lhs` and `rhs` are different, we must shuffle each separately and then OR // If `lhs` and `rhs` are different, we must shuffle each separately and then OR
// them together. This is necessary due to PSHUFB semantics. As in the case above, // them together. This is necessary due to PSHUFB semantics. As in the case above,
@@ -3166,7 +3236,12 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
let constructed_mask = mask.iter().cloned().map(zero_unknown_lane_index).collect(); let constructed_mask = mask.iter().cloned().map(zero_unknown_lane_index).collect();
let tmp1 = ctx.alloc_tmp(RegClass::V128, types::I8X16); let tmp1 = ctx.alloc_tmp(RegClass::V128, types::I8X16);
ctx.emit(Inst::xmm_load_const_seq(constructed_mask, tmp1, ty)); ctx.emit(Inst::xmm_load_const_seq(constructed_mask, tmp1, ty));
ctx.emit(Inst::xmm_rm_r(SseOpcode::Pshufb, RegMem::from(tmp1), tmp0)); ctx.emit(Inst::xmm_rm_r(
SseOpcode::Pshufb,
RegMem::from(tmp1),
tmp0,
None,
));
// PSHUFB the second argument, placing zeroes for unused lanes. // PSHUFB the second argument, placing zeroes for unused lanes.
let constructed_mask = mask let constructed_mask = mask
@@ -3176,11 +3251,21 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
.collect(); .collect();
let tmp2 = ctx.alloc_tmp(RegClass::V128, types::I8X16); let tmp2 = ctx.alloc_tmp(RegClass::V128, types::I8X16);
ctx.emit(Inst::xmm_load_const_seq(constructed_mask, tmp2, ty)); ctx.emit(Inst::xmm_load_const_seq(constructed_mask, tmp2, ty));
ctx.emit(Inst::xmm_rm_r(SseOpcode::Pshufb, RegMem::from(tmp2), dst)); ctx.emit(Inst::xmm_rm_r(
SseOpcode::Pshufb,
RegMem::from(tmp2),
dst,
None,
));
// OR the shuffled registers (the mechanism and lane-size for OR-ing the registers // OR the shuffled registers (the mechanism and lane-size for OR-ing the registers
// is not important). // is not important).
ctx.emit(Inst::xmm_rm_r(SseOpcode::Orps, RegMem::from(tmp0), dst)); ctx.emit(Inst::xmm_rm_r(
SseOpcode::Orps,
RegMem::from(tmp0),
dst,
None,
));
// TODO when AVX512 is enabled we should replace this sequence with a single VPERMB // TODO when AVX512 is enabled we should replace this sequence with a single VPERMB
} }
@@ -3214,6 +3299,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
SseOpcode::Paddusb, SseOpcode::Paddusb,
RegMem::from(zero_mask), RegMem::from(zero_mask),
swizzle_mask, swizzle_mask,
None,
)); ));
// Shuffle `dst` using the fixed-up `swizzle_mask`. // Shuffle `dst` using the fixed-up `swizzle_mask`.
@@ -3221,6 +3307,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
SseOpcode::Pshufb, SseOpcode::Pshufb,
RegMem::from(swizzle_mask), RegMem::from(swizzle_mask),
dst, dst,
None,
)); ));
} }
@@ -3240,7 +3327,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
debug_assert!(lane < ty.lane_count() as u8); debug_assert!(lane < ty.lane_count() as u8);
ctx.emit(Inst::gen_move(dst, in_vec, ty)); ctx.emit(Inst::gen_move(dst, in_vec, ty));
emit_insert_lane(ctx, src, dst, lane, ty.lane_type()); emit_insert_lane(ctx, src, dst, lane, ty.lane_type(), None);
} }
Opcode::Extractlane => { Opcode::Extractlane => {
@@ -3266,7 +3353,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
_ => panic!("Unable to extractlane for lane size: {}", ty.lane_bits()), _ => panic!("Unable to extractlane for lane size: {}", ty.lane_bits()),
}; };
let src = RegMem::reg(src); let src = RegMem::reg(src);
ctx.emit(Inst::xmm_rm_r_imm(sse_op, src, dst, lane, w_bit)); ctx.emit(Inst::xmm_rm_r_imm(sse_op, src, dst, lane, w_bit, None));
} else { } else {
if lane == 0 { if lane == 0 {
// Remove the extractlane instruction, leaving the float where it is. The upper // Remove the extractlane instruction, leaving the float where it is. The upper
@@ -3288,7 +3375,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
_ => unreachable!(), _ => unreachable!(),
}; };
let src = RegMem::reg(src); let src = RegMem::reg(src);
ctx.emit(Inst::xmm_rm_r_imm(sse_op, src, dst, mask, false)); ctx.emit(Inst::xmm_rm_r_imm(sse_op, src, dst, mask, false, None));
} }
} }
} }
@@ -3307,16 +3394,26 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
ctx.emit(Inst::xmm_uninit_value(dst)); ctx.emit(Inst::xmm_uninit_value(dst));
match ty.lane_bits() { match ty.lane_bits() {
8 => { 8 => {
emit_insert_lane(ctx, src, dst, 0, ty.lane_type()); emit_insert_lane(ctx, src, dst, 0, ty.lane_type(), srcloc);
// Initialize a register with all 0s. // Initialize a register with all 0s.
let tmp = ctx.alloc_tmp(RegClass::V128, ty); let tmp = ctx.alloc_tmp(RegClass::V128, ty);
ctx.emit(Inst::xmm_rm_r(SseOpcode::Pxor, RegMem::from(tmp), tmp)); ctx.emit(Inst::xmm_rm_r(
SseOpcode::Pxor,
RegMem::from(tmp),
tmp,
srcloc,
));
// Shuffle the lowest byte lane to all other lanes. // Shuffle the lowest byte lane to all other lanes.
ctx.emit(Inst::xmm_rm_r(SseOpcode::Pshufb, RegMem::from(tmp), dst)) ctx.emit(Inst::xmm_rm_r(
SseOpcode::Pshufb,
RegMem::from(tmp),
dst,
srcloc,
))
} }
16 => { 16 => {
emit_insert_lane(ctx, src.clone(), dst, 0, ty.lane_type()); emit_insert_lane(ctx, src.clone(), dst, 0, ty.lane_type(), srcloc);
emit_insert_lane(ctx, src, dst, 1, ty.lane_type()); emit_insert_lane(ctx, src, dst, 1, ty.lane_type(), srcloc);
// Shuffle the lowest two lanes to all other lanes. // Shuffle the lowest two lanes to all other lanes.
ctx.emit(Inst::xmm_rm_r_imm( ctx.emit(Inst::xmm_rm_r_imm(
SseOpcode::Pshufd, SseOpcode::Pshufd,
@@ -3324,10 +3421,11 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
dst, dst,
0, 0,
false, false,
srcloc,
)) ))
} }
32 => { 32 => {
emit_insert_lane(ctx, src, dst, 0, ty.lane_type()); emit_insert_lane(ctx, src, dst, 0, ty.lane_type(), srcloc);
// Shuffle the lowest lane to all other lanes. // Shuffle the lowest lane to all other lanes.
ctx.emit(Inst::xmm_rm_r_imm( ctx.emit(Inst::xmm_rm_r_imm(
SseOpcode::Pshufd, SseOpcode::Pshufd,
@@ -3335,11 +3433,12 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
dst, dst,
0, 0,
false, false,
srcloc,
)) ))
} }
64 => { 64 => {
emit_insert_lane(ctx, src.clone(), dst, 0, ty.lane_type()); emit_insert_lane(ctx, src.clone(), dst, 0, ty.lane_type(), srcloc);
emit_insert_lane(ctx, src, dst, 1, ty.lane_type()); emit_insert_lane(ctx, src, dst, 1, ty.lane_type(), srcloc);
} }
_ => panic!("Invalid type to splat: {}", ty), _ => panic!("Invalid type to splat: {}", ty),
} }
@@ -3373,9 +3472,14 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
// Initialize a register with all 0s. // Initialize a register with all 0s.
let tmp = ctx.alloc_tmp(RegClass::V128, ty); let tmp = ctx.alloc_tmp(RegClass::V128, ty);
ctx.emit(Inst::xmm_rm_r(SseOpcode::Pxor, RegMem::from(tmp), tmp)); ctx.emit(Inst::xmm_rm_r(
SseOpcode::Pxor,
RegMem::from(tmp),
tmp,
None,
));
// Compare to see what lanes are filled with all 1s. // Compare to see what lanes are filled with all 1s.
ctx.emit(Inst::xmm_rm_r(eq(src_ty), src, tmp)); ctx.emit(Inst::xmm_rm_r(eq(src_ty), src, tmp, None));
// Set the ZF if the result is all zeroes. // Set the ZF if the result is all zeroes.
ctx.emit(Inst::xmm_cmp_rm_r( ctx.emit(Inst::xmm_cmp_rm_r(
SseOpcode::Ptest, SseOpcode::Ptest,