This patch fills in the missing pieces needed to support wasm atomics on newBE/x64.

It does this by providing an implementation of the CLIF instructions `AtomicRmw`, `AtomicCas`,
`AtomicLoad`, `AtomicStore` and `Fence`.

The translation is straightforward.  `AtomicCas` is translated into x64 `cmpxchg`, `AtomicLoad`
becomes a normal load because x64-TSO provides adequate sequencing, `AtomicStore` becomes a
normal store followed by `mfence`, and `Fence` becomes `mfence`.  `AtomicRmw` is the only
complex case: it becomes a normal load, followed by a loop which computes an updated value,
tries to `cmpxchg` it back to memory, and repeats if necessary.

This is a minimum-effort initial implementation.  `AtomicRmw` could be implemented more
efficiently using LOCK-prefixed integer read-modify-write instructions in the case where the old
value in memory is not required.  Subsequent work could add that, if required.

The x64 emitter has been updated to emit the new instructions, obviously.  The `LegacyPrefix`
mechanism has been revised to handle multiple prefix bytes, not just one, since it is now
sometimes necessary to emit both 0x66 (Operand Size Override) and F0 (Lock).

In the aarch64 implementation of atomics, there has been some minor renaming for the sake of
clarity, and for consistency with this x64 implementation.
This commit is contained in:
Julian Seward
2020-08-20 07:36:19 +02:00
committed by julian-seward1
parent ec87aee147
commit 620e4b4e82
13 changed files with 761 additions and 192 deletions

View File

@@ -3,7 +3,7 @@ use crate::ir::immediates::{Ieee32, Ieee64};
use crate::ir::TrapCode;
use crate::isa::x64::inst::args::*;
use crate::isa::x64::inst::*;
use crate::machinst::{MachBuffer, MachInstEmit, MachLabel};
use crate::machinst::{inst_common, MachBuffer, MachInstEmit, MachLabel};
use core::convert::TryInto;
use log::debug;
use regalloc::{Reg, RegClass, Writable};
@@ -118,25 +118,38 @@ impl RexFlags {
}
}
/// For specifying the legacy prefixes (or `None` if no prefix required) to
/// be used at the start an instruction. A given prefix may be required for
/// various operations, including instructions that operate on GPR, SSE, and Vex
/// registers.
enum LegacyPrefix {
/// We may need to include one or more legacy prefix bytes before the REX prefix. This enum
/// covers only the small set of possibilities that we actually need.
enum LegacyPrefixes {
/// No prefix bytes
None,
/// Operand Size Override -- here, denoting "16-bit operation"
_66,
/// The Lock prefix
_F0,
/// Operand size override and Lock
_66F0,
/// REPNE, but no specific meaning here -- is just an opcode extension
_F2,
/// REP/REPE, but no specific meaning here -- is just an opcode extension
_F3,
}
impl LegacyPrefix {
impl LegacyPrefixes {
#[inline(always)]
fn emit(&self, sink: &mut MachBuffer<Inst>) {
match self {
LegacyPrefix::_66 => sink.put1(0x66),
LegacyPrefix::_F2 => sink.put1(0xF2),
LegacyPrefix::_F3 => sink.put1(0xF3),
LegacyPrefix::None => (),
LegacyPrefixes::_66 => sink.put1(0x66),
LegacyPrefixes::_F0 => sink.put1(0xF0),
LegacyPrefixes::_66F0 => {
// I don't think the order matters, but in any case, this is the same order that
// the GNU assembler uses.
sink.put1(0x66);
sink.put1(0xF0);
}
LegacyPrefixes::_F2 => sink.put1(0xF2),
LegacyPrefixes::_F3 => sink.put1(0xF3),
LegacyPrefixes::None => (),
}
}
}
@@ -145,15 +158,16 @@ impl LegacyPrefix {
///
/// For an instruction that has as operands a reg encoding `enc_g` and a memory address `mem_e`,
/// create and emit:
/// - first the REX prefix,
/// - first the legacy prefixes, if any
/// - then the REX prefix, if needed
/// - then caller-supplied opcode byte(s) (`opcodes` and `num_opcodes`),
/// - then the MOD/RM byte,
/// - then optionally, a SIB byte,
/// - and finally optionally an immediate that will be derived from the `mem_e` operand.
///
/// For most instructions up to and including SSE4.2, that will be the whole instruction: this is
/// what we call "standard" instructions, and abbreviate "std" in the name here. VEX instructions
/// will require their own emitter functions.
/// what we call "standard" instructions, and abbreviate "std" in the name here. VEX-prefixed
/// instructions will require their own emitter functions.
///
/// This will also work for 32-bits x86 instructions, assuming no REX prefix is provided.
///
@@ -168,7 +182,7 @@ impl LegacyPrefix {
/// indicate a 64-bit operation.
fn emit_std_enc_mem(
sink: &mut MachBuffer<Inst>,
prefix: LegacyPrefix,
prefixes: LegacyPrefixes,
opcodes: u32,
mut num_opcodes: usize,
enc_g: u8,
@@ -179,7 +193,7 @@ fn emit_std_enc_mem(
// 64-bit integer registers, because they are part of an address
// expression. But `enc_g` can be derived from a register of any class.
prefix.emit(sink);
prefixes.emit(sink);
match mem_e {
Amode::ImmReg { simm32, base } => {
@@ -304,7 +318,7 @@ fn emit_std_enc_mem(
/// operand is a register rather than memory. Hence it is much simpler.
fn emit_std_enc_enc(
sink: &mut MachBuffer<Inst>,
prefix: LegacyPrefix,
prefixes: LegacyPrefixes,
opcodes: u32,
mut num_opcodes: usize,
enc_g: u8,
@@ -316,8 +330,8 @@ fn emit_std_enc_enc(
// integer-to-FP conversion insn, one might be RegClass::I64 and the other
// RegClass::V128.
// The operand-size override.
prefix.emit(sink);
// The legacy prefixes.
prefixes.emit(sink);
// The rex byte.
rex.emit_two_op(sink, enc_g, enc_e);
@@ -338,7 +352,7 @@ fn emit_std_enc_enc(
fn emit_std_reg_mem(
sink: &mut MachBuffer<Inst>,
prefix: LegacyPrefix,
prefixes: LegacyPrefixes,
opcodes: u32,
num_opcodes: usize,
reg_g: Reg,
@@ -346,12 +360,12 @@ fn emit_std_reg_mem(
rex: RexFlags,
) {
let enc_g = reg_enc(reg_g);
emit_std_enc_mem(sink, prefix, opcodes, num_opcodes, enc_g, mem_e, rex);
emit_std_enc_mem(sink, prefixes, opcodes, num_opcodes, enc_g, mem_e, rex);
}
fn emit_std_reg_reg(
sink: &mut MachBuffer<Inst>,
prefix: LegacyPrefix,
prefixes: LegacyPrefixes,
opcodes: u32,
num_opcodes: usize,
reg_g: Reg,
@@ -360,7 +374,7 @@ fn emit_std_reg_reg(
) {
let enc_g = reg_enc(reg_g);
let enc_e = reg_enc(reg_e);
emit_std_enc_enc(sink, prefix, opcodes, num_opcodes, enc_g, enc_e, rex);
emit_std_enc_enc(sink, prefixes, opcodes, num_opcodes, enc_g, enc_e, rex);
}
/// Write a suitable number of bits from an imm64 to the sink.
@@ -481,7 +495,7 @@ pub(crate) fn emit(
RegMemImm::Reg { reg: reg_e } => {
emit_std_reg_reg(
sink,
LegacyPrefix::None,
LegacyPrefixes::None,
0x0FAF,
2,
reg_g.to_reg(),
@@ -493,7 +507,7 @@ pub(crate) fn emit(
RegMemImm::Mem { addr } => {
emit_std_reg_mem(
sink,
LegacyPrefix::None,
LegacyPrefixes::None,
0x0FAF,
2,
reg_g.to_reg(),
@@ -508,7 +522,7 @@ pub(crate) fn emit(
// Yes, really, reg_g twice.
emit_std_reg_reg(
sink,
LegacyPrefix::None,
LegacyPrefixes::None,
opcode,
1,
reg_g.to_reg(),
@@ -535,7 +549,7 @@ pub(crate) fn emit(
// code easily.
emit_std_reg_reg(
sink,
LegacyPrefix::None,
LegacyPrefixes::None,
opcode_r,
1,
*reg_e,
@@ -550,7 +564,7 @@ pub(crate) fn emit(
// Here we revert to the "normal" G-E ordering.
emit_std_reg_mem(
sink,
LegacyPrefix::None,
LegacyPrefixes::None,
opcode_m,
1,
reg_g.to_reg(),
@@ -566,7 +580,7 @@ pub(crate) fn emit(
let enc_g = int_reg_enc(reg_g.to_reg());
emit_std_enc_enc(
sink,
LegacyPrefix::None,
LegacyPrefixes::None,
opcode,
1,
subopcode_i,
@@ -581,9 +595,9 @@ pub(crate) fn emit(
Inst::UnaryRmR { size, op, src, dst } => {
let (prefix, rex_flags) = match size {
2 => (LegacyPrefix::_66, RexFlags::clear_w()),
4 => (LegacyPrefix::None, RexFlags::clear_w()),
8 => (LegacyPrefix::None, RexFlags::set_w()),
2 => (LegacyPrefixes::_66, RexFlags::clear_w()),
4 => (LegacyPrefixes::None, RexFlags::clear_w()),
8 => (LegacyPrefixes::None, RexFlags::set_w()),
_ => unreachable!(),
};
@@ -621,9 +635,9 @@ pub(crate) fn emit(
loc,
} => {
let (prefix, rex_flags) = match size {
2 => (LegacyPrefix::_66, RexFlags::clear_w()),
4 => (LegacyPrefix::None, RexFlags::clear_w()),
8 => (LegacyPrefix::None, RexFlags::set_w()),
2 => (LegacyPrefixes::_66, RexFlags::clear_w()),
4 => (LegacyPrefixes::None, RexFlags::clear_w()),
8 => (LegacyPrefixes::None, RexFlags::set_w()),
_ => unreachable!(),
};
@@ -649,9 +663,9 @@ pub(crate) fn emit(
Inst::MulHi { size, signed, rhs } => {
let (prefix, rex_flags) = match size {
2 => (LegacyPrefix::_66, RexFlags::clear_w()),
4 => (LegacyPrefix::None, RexFlags::clear_w()),
8 => (LegacyPrefix::None, RexFlags::set_w()),
2 => (LegacyPrefixes::_66, RexFlags::clear_w()),
4 => (LegacyPrefixes::None, RexFlags::clear_w()),
8 => (LegacyPrefixes::None, RexFlags::set_w()),
_ => unreachable!(),
};
@@ -826,7 +840,7 @@ pub(crate) fn emit(
} else {
RexFlags::clear_w()
};
emit_std_reg_reg(sink, LegacyPrefix::None, 0x89, 1, *src, dst.to_reg(), rex);
emit_std_reg_reg(sink, LegacyPrefixes::None, 0x89, 1, *src, dst.to_reg(), rex);
}
Inst::MovZX_RM_R {
@@ -880,7 +894,7 @@ pub(crate) fn emit(
}
emit_std_reg_reg(
sink,
LegacyPrefix::None,
LegacyPrefixes::None,
opcodes,
num_opcodes,
dst.to_reg(),
@@ -899,7 +913,7 @@ pub(crate) fn emit(
emit_std_reg_mem(
sink,
LegacyPrefix::None,
LegacyPrefixes::None,
opcodes,
num_opcodes,
dst.to_reg(),
@@ -920,7 +934,7 @@ pub(crate) fn emit(
emit_std_reg_mem(
sink,
LegacyPrefix::None,
LegacyPrefixes::None,
0x8B,
1,
dst.to_reg(),
@@ -931,7 +945,7 @@ pub(crate) fn emit(
Inst::LoadEffectiveAddress { addr, dst } => emit_std_reg_mem(
sink,
LegacyPrefix::None,
LegacyPrefixes::None,
0x8D,
1,
dst.to_reg(),
@@ -982,7 +996,7 @@ pub(crate) fn emit(
}
emit_std_reg_reg(
sink,
LegacyPrefix::None,
LegacyPrefixes::None,
opcodes,
num_opcodes,
dst.to_reg(),
@@ -1001,7 +1015,7 @@ pub(crate) fn emit(
emit_std_reg_mem(
sink,
LegacyPrefix::None,
LegacyPrefixes::None,
opcodes,
num_opcodes,
dst.to_reg(),
@@ -1038,14 +1052,14 @@ pub(crate) fn emit(
};
// MOV r8, r/m8 is (REX.W==0) 88 /r
emit_std_reg_mem(sink, LegacyPrefix::None, 0x88, 1, *src, dst, rex)
emit_std_reg_mem(sink, LegacyPrefixes::None, 0x88, 1, *src, dst, rex)
}
2 => {
// MOV r16, r/m16 is 66 (REX.W==0) 89 /r
emit_std_reg_mem(
sink,
LegacyPrefix::_66,
LegacyPrefixes::_66,
0x89,
1,
*src,
@@ -1058,7 +1072,7 @@ pub(crate) fn emit(
// MOV r32, r/m32 is (REX.W==0) 89 /r
emit_std_reg_mem(
sink,
LegacyPrefix::None,
LegacyPrefixes::None,
0x89,
1,
*src,
@@ -1071,7 +1085,7 @@ pub(crate) fn emit(
// MOV r64, r/m64 is (REX.W==1) 89 /r
emit_std_reg_mem(
sink,
LegacyPrefix::None,
LegacyPrefixes::None,
0x89,
1,
*src,
@@ -1109,7 +1123,7 @@ pub(crate) fn emit(
None => {
// SHL/SHR/SAR %cl, reg32 is (REX.W==0) D3 /subopcode
// SHL/SHR/SAR %cl, reg64 is (REX.W==1) D3 /subopcode
emit_std_enc_enc(sink, LegacyPrefix::None, 0xD3, 1, subopcode, enc_dst, rex);
emit_std_enc_enc(sink, LegacyPrefixes::None, 0xD3, 1, subopcode, enc_dst, rex);
}
Some(num_bits) => {
@@ -1117,7 +1131,7 @@ pub(crate) fn emit(
// SHL/SHR/SAR $ib, reg64 is (REX.W==1) C1 /subopcode ib
// When the shift amount is 1, there's an even shorter encoding, but we don't
// bother with that nicety here.
emit_std_enc_enc(sink, LegacyPrefix::None, 0xC1, 1, subopcode, enc_dst, rex);
emit_std_enc_enc(sink, LegacyPrefixes::None, 0xC1, 1, subopcode, enc_dst, rex);
sink.put1(*num_bits);
}
}
@@ -1125,7 +1139,7 @@ pub(crate) fn emit(
Inst::XmmRmiReg { opcode, src, dst } => {
let rex = RexFlags::clear_w();
let prefix = LegacyPrefix::_66;
let prefix = LegacyPrefixes::_66;
if let RegMemImm::Imm { simm32 } = src {
let (opcode_bytes, reg_digit) = match opcode {
SseOpcode::Psllw => (0x0F71, 6),
@@ -1175,9 +1189,9 @@ pub(crate) fn emit(
src: src_e,
dst: reg_g,
} => {
let mut prefix = LegacyPrefix::None;
let mut prefix = LegacyPrefixes::None;
if *size == 2 {
prefix = LegacyPrefix::_66;
prefix = LegacyPrefixes::_66;
}
let mut rex = match size {
@@ -1245,7 +1259,7 @@ pub(crate) fn emit(
rex_flags.always_emit();
emit_std_enc_enc(
sink,
LegacyPrefix::None,
LegacyPrefixes::None,
opcode,
2,
0,
@@ -1261,9 +1275,9 @@ pub(crate) fn emit(
dst: reg_g,
} => {
let (prefix, rex_flags) = match size {
2 => (LegacyPrefix::_66, RexFlags::clear_w()),
4 => (LegacyPrefix::None, RexFlags::clear_w()),
8 => (LegacyPrefix::None, RexFlags::set_w()),
2 => (LegacyPrefixes::_66, RexFlags::clear_w()),
4 => (LegacyPrefixes::None, RexFlags::clear_w()),
8 => (LegacyPrefixes::None, RexFlags::set_w()),
_ => unreachable!("invalid size spec for cmove"),
};
let opcode = 0x0F40 + cc.get_enc() as u32;
@@ -1315,7 +1329,7 @@ pub(crate) fn emit(
let addr = &addr.finalize(state);
emit_std_enc_mem(
sink,
LegacyPrefix::None,
LegacyPrefixes::None,
0xFF,
1,
6, /*subopcode*/
@@ -1371,7 +1385,7 @@ pub(crate) fn emit(
let reg_enc = int_reg_enc(*reg);
emit_std_enc_enc(
sink,
LegacyPrefix::None,
LegacyPrefixes::None,
0xFF,
1,
2, /*subopcode*/
@@ -1384,7 +1398,7 @@ pub(crate) fn emit(
let addr = &addr.finalize(state);
emit_std_enc_mem(
sink,
LegacyPrefix::None,
LegacyPrefixes::None,
0xFF,
1,
2, /*subopcode*/
@@ -1461,7 +1475,7 @@ pub(crate) fn emit(
let reg_enc = int_reg_enc(*reg);
emit_std_enc_enc(
sink,
LegacyPrefix::None,
LegacyPrefixes::None,
0xFF,
1,
4, /*subopcode*/
@@ -1474,7 +1488,7 @@ pub(crate) fn emit(
let addr = &addr.finalize(state);
emit_std_enc_mem(
sink,
LegacyPrefix::None,
LegacyPrefixes::None,
0xFF,
1,
4, /*subopcode*/
@@ -1596,20 +1610,20 @@ pub(crate) fn emit(
let rex = RexFlags::clear_w();
let (prefix, opcode) = match op {
SseOpcode::Cvtss2sd => (LegacyPrefix::_F3, 0x0F5A),
SseOpcode::Cvtsd2ss => (LegacyPrefix::_F2, 0x0F5A),
SseOpcode::Movaps => (LegacyPrefix::None, 0x0F28),
SseOpcode::Movapd => (LegacyPrefix::_66, 0x0F28),
SseOpcode::Movdqa => (LegacyPrefix::_66, 0x0F6F),
SseOpcode::Movdqu => (LegacyPrefix::_F3, 0x0F6F),
SseOpcode::Movsd => (LegacyPrefix::_F2, 0x0F10),
SseOpcode::Movss => (LegacyPrefix::_F3, 0x0F10),
SseOpcode::Movups => (LegacyPrefix::None, 0x0F10),
SseOpcode::Movupd => (LegacyPrefix::_66, 0x0F10),
SseOpcode::Sqrtps => (LegacyPrefix::None, 0x0F51),
SseOpcode::Sqrtpd => (LegacyPrefix::_66, 0x0F51),
SseOpcode::Sqrtss => (LegacyPrefix::_F3, 0x0F51),
SseOpcode::Sqrtsd => (LegacyPrefix::_F2, 0x0F51),
SseOpcode::Cvtss2sd => (LegacyPrefixes::_F3, 0x0F5A),
SseOpcode::Cvtsd2ss => (LegacyPrefixes::_F2, 0x0F5A),
SseOpcode::Movaps => (LegacyPrefixes::None, 0x0F28),
SseOpcode::Movapd => (LegacyPrefixes::_66, 0x0F28),
SseOpcode::Movdqa => (LegacyPrefixes::_66, 0x0F6F),
SseOpcode::Movdqu => (LegacyPrefixes::_F3, 0x0F6F),
SseOpcode::Movsd => (LegacyPrefixes::_F2, 0x0F10),
SseOpcode::Movss => (LegacyPrefixes::_F3, 0x0F10),
SseOpcode::Movups => (LegacyPrefixes::None, 0x0F10),
SseOpcode::Movupd => (LegacyPrefixes::_66, 0x0F10),
SseOpcode::Sqrtps => (LegacyPrefixes::None, 0x0F51),
SseOpcode::Sqrtpd => (LegacyPrefixes::_66, 0x0F51),
SseOpcode::Sqrtss => (LegacyPrefixes::_F3, 0x0F51),
SseOpcode::Sqrtsd => (LegacyPrefixes::_F2, 0x0F51),
_ => unimplemented!("Opcode {:?} not implemented", op),
};
@@ -1635,49 +1649,49 @@ pub(crate) fn emit(
} => {
let rex = RexFlags::clear_w();
let (prefix, opcode, length) = match op {
SseOpcode::Addps => (LegacyPrefix::None, 0x0F58, 2),
SseOpcode::Addpd => (LegacyPrefix::_66, 0x0F58, 2),
SseOpcode::Addss => (LegacyPrefix::_F3, 0x0F58, 2),
SseOpcode::Addsd => (LegacyPrefix::_F2, 0x0F58, 2),
SseOpcode::Andpd => (LegacyPrefix::_66, 0x0F54, 2),
SseOpcode::Andps => (LegacyPrefix::None, 0x0F54, 2),
SseOpcode::Andnps => (LegacyPrefix::None, 0x0F55, 2),
SseOpcode::Andnpd => (LegacyPrefix::_66, 0x0F55, 2),
SseOpcode::Divps => (LegacyPrefix::None, 0x0F5E, 2),
SseOpcode::Divpd => (LegacyPrefix::_66, 0x0F5E, 2),
SseOpcode::Divss => (LegacyPrefix::_F3, 0x0F5E, 2),
SseOpcode::Divsd => (LegacyPrefix::_F2, 0x0F5E, 2),
SseOpcode::Minps => (LegacyPrefix::None, 0x0F5D, 2),
SseOpcode::Minpd => (LegacyPrefix::_66, 0x0F5D, 2),
SseOpcode::Minss => (LegacyPrefix::_F3, 0x0F5D, 2),
SseOpcode::Minsd => (LegacyPrefix::_F2, 0x0F5D, 2),
SseOpcode::Maxps => (LegacyPrefix::None, 0x0F5F, 2),
SseOpcode::Maxpd => (LegacyPrefix::_66, 0x0F5F, 2),
SseOpcode::Maxss => (LegacyPrefix::_F3, 0x0F5F, 2),
SseOpcode::Maxsd => (LegacyPrefix::_F2, 0x0F5F, 2),
SseOpcode::Mulps => (LegacyPrefix::None, 0x0F59, 2),
SseOpcode::Mulpd => (LegacyPrefix::_66, 0x0F59, 2),
SseOpcode::Mulss => (LegacyPrefix::_F3, 0x0F59, 2),
SseOpcode::Mulsd => (LegacyPrefix::_F2, 0x0F59, 2),
SseOpcode::Orpd => (LegacyPrefix::_66, 0x0F56, 2),
SseOpcode::Orps => (LegacyPrefix::None, 0x0F56, 2),
SseOpcode::Paddb => (LegacyPrefix::_66, 0x0FFC, 2),
SseOpcode::Paddd => (LegacyPrefix::_66, 0x0FFE, 2),
SseOpcode::Paddq => (LegacyPrefix::_66, 0x0FD4, 2),
SseOpcode::Paddw => (LegacyPrefix::_66, 0x0FFD, 2),
SseOpcode::Pmulld => (LegacyPrefix::_66, 0x0F3840, 3),
SseOpcode::Pmullw => (LegacyPrefix::_66, 0x0FD5, 2),
SseOpcode::Pmuludq => (LegacyPrefix::_66, 0x0FF4, 2),
SseOpcode::Psubb => (LegacyPrefix::_66, 0x0FF8, 2),
SseOpcode::Psubd => (LegacyPrefix::_66, 0x0FFA, 2),
SseOpcode::Psubq => (LegacyPrefix::_66, 0x0FFB, 2),
SseOpcode::Psubw => (LegacyPrefix::_66, 0x0FF9, 2),
SseOpcode::Subps => (LegacyPrefix::None, 0x0F5C, 2),
SseOpcode::Subpd => (LegacyPrefix::_66, 0x0F5C, 2),
SseOpcode::Subss => (LegacyPrefix::_F3, 0x0F5C, 2),
SseOpcode::Subsd => (LegacyPrefix::_F2, 0x0F5C, 2),
SseOpcode::Xorps => (LegacyPrefix::None, 0x0F57, 2),
SseOpcode::Xorpd => (LegacyPrefix::_66, 0x0F57, 2),
SseOpcode::Addps => (LegacyPrefixes::None, 0x0F58, 2),
SseOpcode::Addpd => (LegacyPrefixes::_66, 0x0F58, 2),
SseOpcode::Addss => (LegacyPrefixes::_F3, 0x0F58, 2),
SseOpcode::Addsd => (LegacyPrefixes::_F2, 0x0F58, 2),
SseOpcode::Andpd => (LegacyPrefixes::_66, 0x0F54, 2),
SseOpcode::Andps => (LegacyPrefixes::None, 0x0F54, 2),
SseOpcode::Andnps => (LegacyPrefixes::None, 0x0F55, 2),
SseOpcode::Andnpd => (LegacyPrefixes::_66, 0x0F55, 2),
SseOpcode::Divps => (LegacyPrefixes::None, 0x0F5E, 2),
SseOpcode::Divpd => (LegacyPrefixes::_66, 0x0F5E, 2),
SseOpcode::Divss => (LegacyPrefixes::_F3, 0x0F5E, 2),
SseOpcode::Divsd => (LegacyPrefixes::_F2, 0x0F5E, 2),
SseOpcode::Minps => (LegacyPrefixes::None, 0x0F5D, 2),
SseOpcode::Minpd => (LegacyPrefixes::_66, 0x0F5D, 2),
SseOpcode::Minss => (LegacyPrefixes::_F3, 0x0F5D, 2),
SseOpcode::Minsd => (LegacyPrefixes::_F2, 0x0F5D, 2),
SseOpcode::Maxps => (LegacyPrefixes::None, 0x0F5F, 2),
SseOpcode::Maxpd => (LegacyPrefixes::_66, 0x0F5F, 2),
SseOpcode::Maxss => (LegacyPrefixes::_F3, 0x0F5F, 2),
SseOpcode::Maxsd => (LegacyPrefixes::_F2, 0x0F5F, 2),
SseOpcode::Mulps => (LegacyPrefixes::None, 0x0F59, 2),
SseOpcode::Mulpd => (LegacyPrefixes::_66, 0x0F59, 2),
SseOpcode::Mulss => (LegacyPrefixes::_F3, 0x0F59, 2),
SseOpcode::Mulsd => (LegacyPrefixes::_F2, 0x0F59, 2),
SseOpcode::Orpd => (LegacyPrefixes::_66, 0x0F56, 2),
SseOpcode::Orps => (LegacyPrefixes::None, 0x0F56, 2),
SseOpcode::Paddb => (LegacyPrefixes::_66, 0x0FFC, 2),
SseOpcode::Paddd => (LegacyPrefixes::_66, 0x0FFE, 2),
SseOpcode::Paddq => (LegacyPrefixes::_66, 0x0FD4, 2),
SseOpcode::Paddw => (LegacyPrefixes::_66, 0x0FFD, 2),
SseOpcode::Pmulld => (LegacyPrefixes::_66, 0x0F3840, 3),
SseOpcode::Pmullw => (LegacyPrefixes::_66, 0x0FD5, 2),
SseOpcode::Pmuludq => (LegacyPrefixes::_66, 0x0FF4, 2),
SseOpcode::Psubb => (LegacyPrefixes::_66, 0x0FF8, 2),
SseOpcode::Psubd => (LegacyPrefixes::_66, 0x0FFA, 2),
SseOpcode::Psubq => (LegacyPrefixes::_66, 0x0FFB, 2),
SseOpcode::Psubw => (LegacyPrefixes::_66, 0x0FF9, 2),
SseOpcode::Subps => (LegacyPrefixes::None, 0x0F5C, 2),
SseOpcode::Subpd => (LegacyPrefixes::_66, 0x0F5C, 2),
SseOpcode::Subss => (LegacyPrefixes::_F3, 0x0F5C, 2),
SseOpcode::Subsd => (LegacyPrefixes::_F2, 0x0F5C, 2),
SseOpcode::Xorps => (LegacyPrefixes::None, 0x0F57, 2),
SseOpcode::Xorpd => (LegacyPrefixes::_66, 0x0F57, 2),
_ => unimplemented!("Opcode {:?} not implemented", op),
};
@@ -1780,10 +1794,10 @@ pub(crate) fn emit(
Inst::XmmRmRImm { op, src, dst, imm } => {
let prefix = match op {
SseOpcode::Cmpps => LegacyPrefix::None,
SseOpcode::Cmppd => LegacyPrefix::_66,
SseOpcode::Cmpss => LegacyPrefix::_F3,
SseOpcode::Cmpsd => LegacyPrefix::_F2,
SseOpcode::Cmpps => LegacyPrefixes::None,
SseOpcode::Cmppd => LegacyPrefixes::_66,
SseOpcode::Cmpss => LegacyPrefixes::_F3,
SseOpcode::Cmpsd => LegacyPrefixes::_F2,
_ => unimplemented!("Opcode {:?} not implemented", op),
};
let opcode = 0x0FC2;
@@ -1833,14 +1847,14 @@ pub(crate) fn emit(
srcloc,
} => {
let (prefix, opcode) = match op {
SseOpcode::Movaps => (LegacyPrefix::None, 0x0F29),
SseOpcode::Movapd => (LegacyPrefix::_66, 0x0F29),
SseOpcode::Movdqa => (LegacyPrefix::_66, 0x0F7F),
SseOpcode::Movdqu => (LegacyPrefix::_F3, 0x0F7F),
SseOpcode::Movss => (LegacyPrefix::_F3, 0x0F11),
SseOpcode::Movsd => (LegacyPrefix::_F2, 0x0F11),
SseOpcode::Movups => (LegacyPrefix::None, 0x0F11),
SseOpcode::Movupd => (LegacyPrefix::_66, 0x0F11),
SseOpcode::Movaps => (LegacyPrefixes::None, 0x0F29),
SseOpcode::Movapd => (LegacyPrefixes::_66, 0x0F29),
SseOpcode::Movdqa => (LegacyPrefixes::_66, 0x0F7F),
SseOpcode::Movdqu => (LegacyPrefixes::_F3, 0x0F7F),
SseOpcode::Movss => (LegacyPrefixes::_F3, 0x0F11),
SseOpcode::Movsd => (LegacyPrefixes::_F2, 0x0F11),
SseOpcode::Movups => (LegacyPrefixes::None, 0x0F11),
SseOpcode::Movupd => (LegacyPrefixes::_66, 0x0F11),
_ => unimplemented!("Opcode {:?} not implemented", op),
};
let dst = &dst.finalize(state);
@@ -1860,9 +1874,9 @@ pub(crate) fn emit(
let (prefix, opcode, dst_first) = match op {
// Movd and movq use the same opcode; the presence of the REX prefix (set below)
// actually determines which is used.
SseOpcode::Movd | SseOpcode::Movq => (LegacyPrefix::_66, 0x0F7E, false),
SseOpcode::Cvttss2si => (LegacyPrefix::_F3, 0x0F2C, true),
SseOpcode::Cvttsd2si => (LegacyPrefix::_F2, 0x0F2C, true),
SseOpcode::Movd | SseOpcode::Movq => (LegacyPrefixes::_66, 0x0F7E, false),
SseOpcode::Cvttss2si => (LegacyPrefixes::_F3, 0x0F2C, true),
SseOpcode::Cvttsd2si => (LegacyPrefixes::_F2, 0x0F2C, true),
_ => panic!("unexpected opcode {:?}", op),
};
let rex = match dst_size {
@@ -1888,9 +1902,9 @@ pub(crate) fn emit(
let (prefix, opcode) = match op {
// Movd and movq use the same opcode; the presence of the REX prefix (set below)
// actually determines which is used.
SseOpcode::Movd | SseOpcode::Movq => (LegacyPrefix::_66, 0x0F6E),
SseOpcode::Cvtsi2ss => (LegacyPrefix::_F3, 0x0F2A),
SseOpcode::Cvtsi2sd => (LegacyPrefix::_F2, 0x0F2A),
SseOpcode::Movd | SseOpcode::Movq => (LegacyPrefixes::_66, 0x0F6E),
SseOpcode::Cvtsi2ss => (LegacyPrefixes::_F3, 0x0F2A),
SseOpcode::Cvtsi2sd => (LegacyPrefixes::_F2, 0x0F2A),
_ => panic!("unexpected opcode {:?}", op),
};
let rex = match *src_size {
@@ -1911,8 +1925,8 @@ pub(crate) fn emit(
Inst::XMM_Cmp_RM_R { op, src, dst } => {
let rex = RexFlags::clear_w();
let (prefix, opcode) = match op {
SseOpcode::Ucomisd => (LegacyPrefix::_66, 0x0F2E),
SseOpcode::Ucomiss => (LegacyPrefix::None, 0x0F2E),
SseOpcode::Ucomisd => (LegacyPrefixes::_66, 0x0F2E),
SseOpcode::Ucomiss => (LegacyPrefixes::None, 0x0F2E),
_ => unimplemented!("Emit xmm cmp rm r"),
};
@@ -2431,6 +2445,113 @@ pub(crate) fn emit(
}
}
Inst::LockCmpxchg {
ty,
src,
dst,
srcloc,
} => {
if let Some(srcloc) = srcloc {
sink.add_trap(*srcloc, TrapCode::HeapOutOfBounds);
}
// lock cmpxchg{b,w,l,q} %src, (dst)
// Note that 0xF0 is the Lock prefix.
let (prefix, rex, opcodes) = match *ty {
types::I8 => {
let mut rex_flags = RexFlags::clear_w();
let enc_src = int_reg_enc(*src);
if enc_src >= 4 && enc_src <= 7 {
rex_flags.always_emit();
};
(LegacyPrefixes::_F0, rex_flags, 0x0FB0)
}
types::I16 => (LegacyPrefixes::_66F0, RexFlags::clear_w(), 0x0FB1),
types::I32 => (LegacyPrefixes::_F0, RexFlags::clear_w(), 0x0FB1),
types::I64 => (LegacyPrefixes::_F0, RexFlags::set_w(), 0x0FB1),
_ => unreachable!(),
};
emit_std_reg_mem(sink, prefix, opcodes, 2, *src, &dst.finalize(state), rex);
}
Inst::AtomicRmwSeq { ty, op, srcloc } => {
// Emit this:
//
// mov{zbq,zwq,zlq,q} (%r9), %rax // rax = old value
// again:
// movq %rax, %r11 // rax = old value, r11 = old value
// `op`q %r10, %r11 // rax = old value, r11 = new value
// lock cmpxchg{b,w,l,q} %r11, (%r9) // try to store new value
// jnz again // If this is taken, rax will have a "revised" old value
//
// Operand conventions:
// IN: %r9 (addr), %r10 (2nd arg for `op`)
// OUT: %rax (old value), %r11 (trashed), %rflags (trashed)
//
// In the case where the operation is 'xchg', the "`op`q" instruction is instead
// movq %r10, %r11
// so that we simply write in the destination, the "2nd arg for `op`".
let rax = regs::rax();
let r9 = regs::r9();
let r10 = regs::r10();
let r11 = regs::r11();
let rax_w = Writable::from_reg(rax);
let r11_w = Writable::from_reg(r11);
let amode = Amode::imm_reg(0, r9);
let again_label = sink.get_label();
// mov{zbq,zwq,zlq,q} (%r9), %rax
// No need to call `add_trap` here, since the `i1` emit will do that.
let i1 = Inst::load(*ty, amode.clone(), rax_w, ExtKind::ZeroExtend, *srcloc);
i1.emit(sink, flags, state);
// again:
sink.bind_label(again_label);
// movq %rax, %r11
let i2 = Inst::mov_r_r(true, rax, r11_w);
i2.emit(sink, flags, state);
// opq %r10, %r11
let r10_rmi = RegMemImm::reg(r10);
let i3 = if *op == inst_common::AtomicRmwOp::Xchg {
Inst::mov_r_r(true, r10, r11_w)
} else {
let alu_op = match op {
inst_common::AtomicRmwOp::Add => AluRmiROpcode::Add,
inst_common::AtomicRmwOp::Sub => AluRmiROpcode::Sub,
inst_common::AtomicRmwOp::And => AluRmiROpcode::And,
inst_common::AtomicRmwOp::Or => AluRmiROpcode::Or,
inst_common::AtomicRmwOp::Xor => AluRmiROpcode::Xor,
inst_common::AtomicRmwOp::Xchg => unreachable!(),
};
Inst::alu_rmi_r(true, alu_op, r10_rmi, r11_w)
};
i3.emit(sink, flags, state);
// lock cmpxchg{b,w,l,q} %r11, (%r9)
// No need to call `add_trap` here, since the `i4` emit will do that.
let i4 = Inst::LockCmpxchg {
ty: *ty,
src: r11,
dst: amode.into(),
srcloc: *srcloc,
};
i4.emit(sink, flags, state);
// jnz again
one_way_jmp(sink, CC::NZ, again_label);
}
Inst::Fence { kind } => {
sink.put1(0x0F);
sink.put1(0xAE);
match kind {
FenceKind::MFence => sink.put1(0xF0), // mfence = 0F AE F0
FenceKind::LFence => sink.put1(0xE8), // lfence = 0F AE E8
FenceKind::SFence => sink.put1(0xF8), // sfence = 0F AE F8
}
}
Inst::Hlt => {
sink.put1(0xcc);
}