diff --git a/cranelift/codegen/src/isa/aarch64/inst/args.rs b/cranelift/codegen/src/isa/aarch64/inst/args.rs
index 729d21d121..fe8660bbaf 100644
--- a/cranelift/codegen/src/isa/aarch64/inst/args.rs
+++ b/cranelift/codegen/src/isa/aarch64/inst/args.rs
@@ -3,7 +3,6 @@
 // Some variants are never constructed, but we still want them as options in the future.
 #![allow(dead_code)]
 
-use crate::ir;
 use crate::ir::types::{F32X2, F32X4, F64X2, I16X4, I16X8, I32X2, I32X4, I64X2, I8X16, I8X8};
 use crate::ir::Type;
 use crate::isa::aarch64::inst::*;
@@ -681,30 +680,3 @@ impl VectorSize {
         }
     }
 }
-
-//=============================================================================
-// Instruction sub-components: atomic memory update operations
-
-#[derive(Clone, Copy, Debug, PartialEq, Eq)]
-#[repr(u8)]
-pub enum AtomicRMWOp {
-    Add,
-    Sub,
-    And,
-    Or,
-    Xor,
-    Xchg,
-}
-
-impl AtomicRMWOp {
-    pub fn from(ir_op: ir::AtomicRmwOp) -> Self {
-        match ir_op {
-            ir::AtomicRmwOp::Add => AtomicRMWOp::Add,
-            ir::AtomicRmwOp::Sub => AtomicRMWOp::Sub,
-            ir::AtomicRmwOp::And => AtomicRMWOp::And,
-            ir::AtomicRmwOp::Or => AtomicRMWOp::Or,
-            ir::AtomicRmwOp::Xor => AtomicRMWOp::Xor,
-            ir::AtomicRmwOp::Xchg => AtomicRMWOp::Xchg,
-        }
-    }
-}
diff --git a/cranelift/codegen/src/isa/aarch64/inst/emit.rs b/cranelift/codegen/src/isa/aarch64/inst/emit.rs
index 32fe3aa6cf..60a81eb005 100644
--- a/cranelift/codegen/src/isa/aarch64/inst/emit.rs
+++ b/cranelift/codegen/src/isa/aarch64/inst/emit.rs
@@ -1090,18 +1090,18 @@ impl MachInstEmit for Inst {
                 }
                 sink.put4(enc_ldxr(ty, x27wr, x25)); // ldxr x27, [x25]
 
-                if op == AtomicRMWOp::Xchg {
+                if op == inst_common::AtomicRmwOp::Xchg {
                     // mov x28, x26
                     sink.put4(enc_arith_rrr(0b101_01010_00_0, 0b000000, x28wr, xzr, x26))
                 } else {
                     // add/sub/and/orr/eor x28, x27, x26
                     let bits_31_21 = match op {
-                        AtomicRMWOp::Add => 0b100_01011_00_0,
-                        AtomicRMWOp::Sub => 0b110_01011_00_0,
-                        AtomicRMWOp::And => 0b100_01010_00_0,
-                        AtomicRMWOp::Or => 0b101_01010_00_0,
-                        AtomicRMWOp::Xor => 0b110_01010_00_0,
-                        AtomicRMWOp::Xchg => unreachable!(),
+                        inst_common::AtomicRmwOp::Add => 0b100_01011_00_0,
+                        inst_common::AtomicRmwOp::Sub => 0b110_01011_00_0,
+                        inst_common::AtomicRmwOp::And => 0b100_01010_00_0,
+                        inst_common::AtomicRmwOp::Or => 0b101_01010_00_0,
+                        inst_common::AtomicRmwOp::Xor => 0b110_01010_00_0,
+                        inst_common::AtomicRmwOp::Xchg => unreachable!(),
                     };
                     sink.put4(enc_arith_rrr(bits_31_21, 0b000000, x28wr, x27, x26));
                 }
diff --git a/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs b/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs
index e2f08abb21..f8b446de31 100644
--- a/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs
+++ b/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs
@@ -4551,7 +4551,7 @@ fn test_aarch64_binemit() {
     insns.push((
         Inst::AtomicRMW {
             ty: I16,
-            op: AtomicRMWOp::Xor,
+            op: inst_common::AtomicRmwOp::Xor,
             srcloc: None,
         },
         "BF3B03D53B7F5F487C031ACA3C7F1848B8FFFFB5BF3B03D5",
@@ -4561,7 +4561,7 @@ fn test_aarch64_binemit() {
     insns.push((
         Inst::AtomicRMW {
             ty: I32,
-            op: AtomicRMWOp::Xchg,
+            op: inst_common::AtomicRmwOp::Xchg,
             srcloc: None,
         },
         "BF3B03D53B7F5F88FC031AAA3C7F1888B8FFFFB5BF3B03D5",
diff --git a/cranelift/codegen/src/isa/aarch64/inst/mod.rs b/cranelift/codegen/src/isa/aarch64/inst/mod.rs
index b90dccd41a..b527b7dc19 100644
--- a/cranelift/codegen/src/isa/aarch64/inst/mod.rs
+++ b/cranelift/codegen/src/isa/aarch64/inst/mod.rs
@@ -649,7 +649,7 @@ pub enum Inst {
     /// x28   (wr) scratch reg; value afterwards has no meaning
     AtomicRMW {
         ty: Type, // I8, I16, I32 or I64
-        op: AtomicRMWOp,
+        op: inst_common::AtomicRmwOp,
         srcloc: Option<SourceLoc>,
     },
 
diff --git a/cranelift/codegen/src/isa/aarch64/lower.rs b/cranelift/codegen/src/isa/aarch64/lower.rs
index d399b90ed0..55b675a714 100644
--- a/cranelift/codegen/src/isa/aarch64/lower.rs
+++ b/cranelift/codegen/src/isa/aarch64/lower.rs
@@ -7,10 +7,11 @@
 //!
 //! - Floating-point immediates (FIMM instruction).
 
+use crate::ir;
 use crate::ir::condcodes::{FloatCC, IntCC};
 use crate::ir::types::*;
 use crate::ir::Inst as IRInst;
-use crate::ir::{AtomicRmwOp, InstructionData, Opcode, TrapCode, Type};
+use crate::ir::{InstructionData, Opcode, TrapCode, Type};
 use crate::machinst::lower::*;
 use crate::machinst::*;
 use crate::CodegenResult;
@@ -1067,7 +1068,7 @@ pub(crate) fn inst_trapcode(data: &InstructionData) -> Option<TrapCode> {
     }
 }
 
-pub(crate) fn inst_atomic_rmw_op(data: &InstructionData) -> Option<AtomicRmwOp> {
+pub(crate) fn inst_atomic_rmw_op(data: &InstructionData) -> Option<ir::AtomicRmwOp> {
     match data {
         &InstructionData::AtomicRmw { op, .. } => Some(op),
         _ => None,
diff --git a/cranelift/codegen/src/isa/aarch64/lower_inst.rs b/cranelift/codegen/src/isa/aarch64/lower_inst.rs
index b2915d024e..b52f01364d 100644
--- a/cranelift/codegen/src/isa/aarch64/lower_inst.rs
+++ b/cranelift/codegen/src/isa/aarch64/lower_inst.rs
@@ -21,7 +21,8 @@ use smallvec::SmallVec;
 
 use super::lower::*;
 
-fn is_single_word_int_ty(ty: Type) -> bool {
+/// This is target-word-size dependent.  And it excludes booleans and reftypes.
+fn is_valid_atomic_transaction_ty(ty: Type) -> bool {
     match ty {
         I8 | I16 | I32 | I64 => true,
         _ => false,
@@ -1228,7 +1229,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
             let mut r_addr = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
             let mut r_arg2 = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
             let ty_access = ty.unwrap();
-            assert!(is_single_word_int_ty(ty_access));
+            assert!(is_valid_atomic_transaction_ty(ty_access));
             let memflags = ctx.memflags(insn).expect("memory flags");
             let srcloc = if !memflags.notrap() {
                 Some(ctx.srcloc(insn))
@@ -1244,7 +1245,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
             ctx.emit(Inst::gen_move(Writable::from_reg(xreg(25)), r_addr, I64));
             ctx.emit(Inst::gen_move(Writable::from_reg(xreg(26)), r_arg2, I64));
             // Now the AtomicRMW insn itself
-            let op = AtomicRMWOp::from(inst_atomic_rmw_op(ctx.data(insn)).unwrap());
+            let op = inst_common::AtomicRmwOp::from(inst_atomic_rmw_op(ctx.data(insn)).unwrap());
             ctx.emit(Inst::AtomicRMW {
                 ty: ty_access,
                 op,
@@ -1264,7 +1265,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
             let mut r_expected = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
             let mut r_replacement = put_input_in_reg(ctx, inputs[2], NarrowValueMode::None);
             let ty_access = ty.unwrap();
-            assert!(is_single_word_int_ty(ty_access));
+            assert!(is_valid_atomic_transaction_ty(ty_access));
             let memflags = ctx.memflags(insn).expect("memory flags");
             let srcloc = if !memflags.notrap() {
                 Some(ctx.srcloc(insn))
@@ -1302,7 +1303,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
             let r_data = get_output_reg(ctx, outputs[0]);
             let r_addr = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
             let ty_access = ty.unwrap();
-            assert!(is_single_word_int_ty(ty_access));
+            assert!(is_valid_atomic_transaction_ty(ty_access));
             let memflags = ctx.memflags(insn).expect("memory flags");
             let srcloc = if !memflags.notrap() {
                 Some(ctx.srcloc(insn))
@@ -1321,7 +1322,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
             let r_data = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
             let r_addr = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
             let ty_access = ctx.input_ty(insn, 0);
-            assert!(is_single_word_int_ty(ty_access));
+            assert!(is_valid_atomic_transaction_ty(ty_access));
             let memflags = ctx.memflags(insn).expect("memory flags");
             let srcloc = if !memflags.notrap() {
                 Some(ctx.srcloc(insn))
diff --git a/cranelift/codegen/src/isa/x64/inst/args.rs b/cranelift/codegen/src/isa/x64/inst/args.rs
index 343f3322d0..8690c57a4c 100644
--- a/cranelift/codegen/src/isa/x64/inst/args.rs
+++ b/cranelift/codegen/src/isa/x64/inst/args.rs
@@ -1010,3 +1010,14 @@ impl OperandSize {
         }
     }
 }
+
+/// An x64 memory fence kind.
+#[derive(Clone)]
+pub enum FenceKind {
+    /// `mfence` instruction ("Memory Fence")
+    MFence,
+    /// `lfence` instruction ("Load Fence")
+    LFence,
+    /// `sfence` instruction ("Store Fence")
+    SFence,
+}
diff --git a/cranelift/codegen/src/isa/x64/inst/emit.rs b/cranelift/codegen/src/isa/x64/inst/emit.rs
index 9bae562c5c..b54de499c9 100644
--- a/cranelift/codegen/src/isa/x64/inst/emit.rs
+++ b/cranelift/codegen/src/isa/x64/inst/emit.rs
@@ -3,7 +3,7 @@ use crate::ir::immediates::{Ieee32, Ieee64};
 use crate::ir::TrapCode;
 use crate::isa::x64::inst::args::*;
 use crate::isa::x64::inst::*;
-use crate::machinst::{MachBuffer, MachInstEmit, MachLabel};
+use crate::machinst::{inst_common, MachBuffer, MachInstEmit, MachLabel};
 use core::convert::TryInto;
 use log::debug;
 use regalloc::{Reg, RegClass, Writable};
@@ -118,25 +118,38 @@ impl RexFlags {
     }
 }
 
-/// For specifying the legacy prefixes (or `None` if no prefix required) to
-/// be used at the start an instruction. A given prefix may be required for
-/// various operations, including instructions that operate on GPR, SSE, and Vex
-/// registers.
-enum LegacyPrefix {
+/// We may need to include one or more legacy prefix bytes before the REX prefix.  This enum
+/// covers only the small set of possibilities that we actually need.
+enum LegacyPrefixes {
+    /// No prefix bytes
     None,
+    /// Operand Size Override -- here, denoting "16-bit operation"
     _66,
+    /// The Lock prefix
+    _F0,
+    /// Operand size override and Lock
+    _66F0,
+    /// REPNE, but no specific meaning here -- is just an opcode extension
     _F2,
+    /// REP/REPE, but no specific meaning here -- is just an opcode extension
     _F3,
 }
 
-impl LegacyPrefix {
+impl LegacyPrefixes {
     #[inline(always)]
     fn emit(&self, sink: &mut MachBuffer<Inst>) {
         match self {
-            LegacyPrefix::_66 => sink.put1(0x66),
-            LegacyPrefix::_F2 => sink.put1(0xF2),
-            LegacyPrefix::_F3 => sink.put1(0xF3),
-            LegacyPrefix::None => (),
+            LegacyPrefixes::_66 => sink.put1(0x66),
+            LegacyPrefixes::_F0 => sink.put1(0xF0),
+            LegacyPrefixes::_66F0 => {
+                // I don't think the order matters, but in any case, this is the same order that
+                // the GNU assembler uses.
+                sink.put1(0x66);
+                sink.put1(0xF0);
+            }
+            LegacyPrefixes::_F2 => sink.put1(0xF2),
+            LegacyPrefixes::_F3 => sink.put1(0xF3),
+            LegacyPrefixes::None => (),
         }
     }
 }
@@ -145,15 +158,16 @@ impl LegacyPrefix {
 ///
 /// For an instruction that has as operands a reg encoding `enc_g` and a memory address `mem_e`,
 /// create and emit:
-/// - first the REX prefix,
+/// - first the legacy prefixes, if any
+/// - then the REX prefix, if needed
 /// - then caller-supplied opcode byte(s) (`opcodes` and `num_opcodes`),
 /// - then the MOD/RM byte,
 /// - then optionally, a SIB byte,
 /// - and finally optionally an immediate that will be derived from the `mem_e` operand.
 ///
 /// For most instructions up to and including SSE4.2, that will be the whole instruction: this is
-/// what we call "standard" instructions, and abbreviate "std" in the name here. VEX instructions
-/// will require their own emitter functions.
+/// what we call "standard" instructions, and abbreviate "std" in the name here. VEX-prefixed
+/// instructions will require their own emitter functions.
 ///
 /// This will also work for 32-bits x86 instructions, assuming no REX prefix is provided.
 ///
@@ -168,7 +182,7 @@ impl LegacyPrefix {
 /// indicate a 64-bit operation.
 fn emit_std_enc_mem(
     sink: &mut MachBuffer<Inst>,
-    prefix: LegacyPrefix,
+    prefixes: LegacyPrefixes,
     opcodes: u32,
     mut num_opcodes: usize,
     enc_g: u8,
@@ -179,7 +193,7 @@ fn emit_std_enc_mem(
     // 64-bit integer registers, because they are part of an address
     // expression.  But `enc_g` can be derived from a register of any class.
 
-    prefix.emit(sink);
+    prefixes.emit(sink);
 
     match mem_e {
         Amode::ImmReg { simm32, base } => {
@@ -304,7 +318,7 @@ fn emit_std_enc_mem(
 /// operand is a register rather than memory.  Hence it is much simpler.
 fn emit_std_enc_enc(
     sink: &mut MachBuffer<Inst>,
-    prefix: LegacyPrefix,
+    prefixes: LegacyPrefixes,
     opcodes: u32,
     mut num_opcodes: usize,
     enc_g: u8,
@@ -316,8 +330,8 @@ fn emit_std_enc_enc(
     // integer-to-FP conversion insn, one might be RegClass::I64 and the other
     // RegClass::V128.
 
-    // The operand-size override.
-    prefix.emit(sink);
+    // The legacy prefixes.
+    prefixes.emit(sink);
 
     // The rex byte.
     rex.emit_two_op(sink, enc_g, enc_e);
@@ -338,7 +352,7 @@ fn emit_std_enc_enc(
 
 fn emit_std_reg_mem(
     sink: &mut MachBuffer<Inst>,
-    prefix: LegacyPrefix,
+    prefixes: LegacyPrefixes,
     opcodes: u32,
     num_opcodes: usize,
     reg_g: Reg,
@@ -346,12 +360,12 @@ fn emit_std_reg_mem(
     rex: RexFlags,
 ) {
     let enc_g = reg_enc(reg_g);
-    emit_std_enc_mem(sink, prefix, opcodes, num_opcodes, enc_g, mem_e, rex);
+    emit_std_enc_mem(sink, prefixes, opcodes, num_opcodes, enc_g, mem_e, rex);
 }
 
 fn emit_std_reg_reg(
     sink: &mut MachBuffer<Inst>,
-    prefix: LegacyPrefix,
+    prefixes: LegacyPrefixes,
     opcodes: u32,
     num_opcodes: usize,
     reg_g: Reg,
@@ -360,7 +374,7 @@ fn emit_std_reg_reg(
 ) {
     let enc_g = reg_enc(reg_g);
     let enc_e = reg_enc(reg_e);
-    emit_std_enc_enc(sink, prefix, opcodes, num_opcodes, enc_g, enc_e, rex);
+    emit_std_enc_enc(sink, prefixes, opcodes, num_opcodes, enc_g, enc_e, rex);
 }
 
 /// Write a suitable number of bits from an imm64 to the sink.
@@ -481,7 +495,7 @@ pub(crate) fn emit(
                     RegMemImm::Reg { reg: reg_e } => {
                         emit_std_reg_reg(
                             sink,
-                            LegacyPrefix::None,
+                            LegacyPrefixes::None,
                             0x0FAF,
                             2,
                             reg_g.to_reg(),
@@ -493,7 +507,7 @@ pub(crate) fn emit(
                     RegMemImm::Mem { addr } => {
                         emit_std_reg_mem(
                             sink,
-                            LegacyPrefix::None,
+                            LegacyPrefixes::None,
                             0x0FAF,
                             2,
                             reg_g.to_reg(),
@@ -508,7 +522,7 @@ pub(crate) fn emit(
                         // Yes, really, reg_g twice.
                         emit_std_reg_reg(
                             sink,
-                            LegacyPrefix::None,
+                            LegacyPrefixes::None,
                             opcode,
                             1,
                             reg_g.to_reg(),
@@ -535,7 +549,7 @@ pub(crate) fn emit(
                         // code easily.
                         emit_std_reg_reg(
                             sink,
-                            LegacyPrefix::None,
+                            LegacyPrefixes::None,
                             opcode_r,
                             1,
                             *reg_e,
@@ -550,7 +564,7 @@ pub(crate) fn emit(
                         // Here we revert to the "normal" G-E ordering.
                         emit_std_reg_mem(
                             sink,
-                            LegacyPrefix::None,
+                            LegacyPrefixes::None,
                             opcode_m,
                             1,
                             reg_g.to_reg(),
@@ -566,7 +580,7 @@ pub(crate) fn emit(
                         let enc_g = int_reg_enc(reg_g.to_reg());
                         emit_std_enc_enc(
                             sink,
-                            LegacyPrefix::None,
+                            LegacyPrefixes::None,
                             opcode,
                             1,
                             subopcode_i,
@@ -581,9 +595,9 @@ pub(crate) fn emit(
 
         Inst::UnaryRmR { size, op, src, dst } => {
             let (prefix, rex_flags) = match size {
-                2 => (LegacyPrefix::_66, RexFlags::clear_w()),
-                4 => (LegacyPrefix::None, RexFlags::clear_w()),
-                8 => (LegacyPrefix::None, RexFlags::set_w()),
+                2 => (LegacyPrefixes::_66, RexFlags::clear_w()),
+                4 => (LegacyPrefixes::None, RexFlags::clear_w()),
+                8 => (LegacyPrefixes::None, RexFlags::set_w()),
                 _ => unreachable!(),
             };
 
@@ -621,9 +635,9 @@ pub(crate) fn emit(
             loc,
         } => {
             let (prefix, rex_flags) = match size {
-                2 => (LegacyPrefix::_66, RexFlags::clear_w()),
-                4 => (LegacyPrefix::None, RexFlags::clear_w()),
-                8 => (LegacyPrefix::None, RexFlags::set_w()),
+                2 => (LegacyPrefixes::_66, RexFlags::clear_w()),
+                4 => (LegacyPrefixes::None, RexFlags::clear_w()),
+                8 => (LegacyPrefixes::None, RexFlags::set_w()),
                 _ => unreachable!(),
             };
 
@@ -649,9 +663,9 @@ pub(crate) fn emit(
 
         Inst::MulHi { size, signed, rhs } => {
             let (prefix, rex_flags) = match size {
-                2 => (LegacyPrefix::_66, RexFlags::clear_w()),
-                4 => (LegacyPrefix::None, RexFlags::clear_w()),
-                8 => (LegacyPrefix::None, RexFlags::set_w()),
+                2 => (LegacyPrefixes::_66, RexFlags::clear_w()),
+                4 => (LegacyPrefixes::None, RexFlags::clear_w()),
+                8 => (LegacyPrefixes::None, RexFlags::set_w()),
                 _ => unreachable!(),
             };
 
@@ -826,7 +840,7 @@ pub(crate) fn emit(
             } else {
                 RexFlags::clear_w()
             };
-            emit_std_reg_reg(sink, LegacyPrefix::None, 0x89, 1, *src, dst.to_reg(), rex);
+            emit_std_reg_reg(sink, LegacyPrefixes::None, 0x89, 1, *src, dst.to_reg(), rex);
         }
 
         Inst::MovZX_RM_R {
@@ -880,7 +894,7 @@ pub(crate) fn emit(
                     }
                     emit_std_reg_reg(
                         sink,
-                        LegacyPrefix::None,
+                        LegacyPrefixes::None,
                         opcodes,
                         num_opcodes,
                         dst.to_reg(),
@@ -899,7 +913,7 @@ pub(crate) fn emit(
 
                     emit_std_reg_mem(
                         sink,
-                        LegacyPrefix::None,
+                        LegacyPrefixes::None,
                         opcodes,
                         num_opcodes,
                         dst.to_reg(),
@@ -920,7 +934,7 @@ pub(crate) fn emit(
 
             emit_std_reg_mem(
                 sink,
-                LegacyPrefix::None,
+                LegacyPrefixes::None,
                 0x8B,
                 1,
                 dst.to_reg(),
@@ -931,7 +945,7 @@ pub(crate) fn emit(
 
         Inst::LoadEffectiveAddress { addr, dst } => emit_std_reg_mem(
             sink,
-            LegacyPrefix::None,
+            LegacyPrefixes::None,
             0x8D,
             1,
             dst.to_reg(),
@@ -982,7 +996,7 @@ pub(crate) fn emit(
                     }
                     emit_std_reg_reg(
                         sink,
-                        LegacyPrefix::None,
+                        LegacyPrefixes::None,
                         opcodes,
                         num_opcodes,
                         dst.to_reg(),
@@ -1001,7 +1015,7 @@ pub(crate) fn emit(
 
                     emit_std_reg_mem(
                         sink,
-                        LegacyPrefix::None,
+                        LegacyPrefixes::None,
                         opcodes,
                         num_opcodes,
                         dst.to_reg(),
@@ -1038,14 +1052,14 @@ pub(crate) fn emit(
                     };
 
                     // MOV r8, r/m8 is (REX.W==0) 88 /r
-                    emit_std_reg_mem(sink, LegacyPrefix::None, 0x88, 1, *src, dst, rex)
+                    emit_std_reg_mem(sink, LegacyPrefixes::None, 0x88, 1, *src, dst, rex)
                 }
 
                 2 => {
                     // MOV r16, r/m16 is 66 (REX.W==0) 89 /r
                     emit_std_reg_mem(
                         sink,
-                        LegacyPrefix::_66,
+                        LegacyPrefixes::_66,
                         0x89,
                         1,
                         *src,
@@ -1058,7 +1072,7 @@ pub(crate) fn emit(
                     // MOV r32, r/m32 is (REX.W==0) 89 /r
                     emit_std_reg_mem(
                         sink,
-                        LegacyPrefix::None,
+                        LegacyPrefixes::None,
                         0x89,
                         1,
                         *src,
@@ -1071,7 +1085,7 @@ pub(crate) fn emit(
                     // MOV r64, r/m64 is (REX.W==1) 89 /r
                     emit_std_reg_mem(
                         sink,
-                        LegacyPrefix::None,
+                        LegacyPrefixes::None,
                         0x89,
                         1,
                         *src,
@@ -1109,7 +1123,7 @@ pub(crate) fn emit(
                 None => {
                     // SHL/SHR/SAR %cl, reg32 is (REX.W==0) D3 /subopcode
                     // SHL/SHR/SAR %cl, reg64 is (REX.W==1) D3 /subopcode
-                    emit_std_enc_enc(sink, LegacyPrefix::None, 0xD3, 1, subopcode, enc_dst, rex);
+                    emit_std_enc_enc(sink, LegacyPrefixes::None, 0xD3, 1, subopcode, enc_dst, rex);
                 }
 
                 Some(num_bits) => {
@@ -1117,7 +1131,7 @@ pub(crate) fn emit(
                     // SHL/SHR/SAR $ib, reg64 is (REX.W==1) C1 /subopcode ib
                     // When the shift amount is 1, there's an even shorter encoding, but we don't
                     // bother with that nicety here.
-                    emit_std_enc_enc(sink, LegacyPrefix::None, 0xC1, 1, subopcode, enc_dst, rex);
+                    emit_std_enc_enc(sink, LegacyPrefixes::None, 0xC1, 1, subopcode, enc_dst, rex);
                     sink.put1(*num_bits);
                 }
             }
@@ -1125,7 +1139,7 @@ pub(crate) fn emit(
 
         Inst::XmmRmiReg { opcode, src, dst } => {
             let rex = RexFlags::clear_w();
-            let prefix = LegacyPrefix::_66;
+            let prefix = LegacyPrefixes::_66;
             if let RegMemImm::Imm { simm32 } = src {
                 let (opcode_bytes, reg_digit) = match opcode {
                     SseOpcode::Psllw => (0x0F71, 6),
@@ -1175,9 +1189,9 @@ pub(crate) fn emit(
             src: src_e,
             dst: reg_g,
         } => {
-            let mut prefix = LegacyPrefix::None;
+            let mut prefix = LegacyPrefixes::None;
             if *size == 2 {
-                prefix = LegacyPrefix::_66;
+                prefix = LegacyPrefixes::_66;
             }
 
             let mut rex = match size {
@@ -1245,7 +1259,7 @@ pub(crate) fn emit(
             rex_flags.always_emit();
             emit_std_enc_enc(
                 sink,
-                LegacyPrefix::None,
+                LegacyPrefixes::None,
                 opcode,
                 2,
                 0,
@@ -1261,9 +1275,9 @@ pub(crate) fn emit(
             dst: reg_g,
         } => {
             let (prefix, rex_flags) = match size {
-                2 => (LegacyPrefix::_66, RexFlags::clear_w()),
-                4 => (LegacyPrefix::None, RexFlags::clear_w()),
-                8 => (LegacyPrefix::None, RexFlags::set_w()),
+                2 => (LegacyPrefixes::_66, RexFlags::clear_w()),
+                4 => (LegacyPrefixes::None, RexFlags::clear_w()),
+                8 => (LegacyPrefixes::None, RexFlags::set_w()),
                 _ => unreachable!("invalid size spec for cmove"),
             };
             let opcode = 0x0F40 + cc.get_enc() as u32;
@@ -1315,7 +1329,7 @@ pub(crate) fn emit(
                     let addr = &addr.finalize(state);
                     emit_std_enc_mem(
                         sink,
-                        LegacyPrefix::None,
+                        LegacyPrefixes::None,
                         0xFF,
                         1,
                         6, /*subopcode*/
@@ -1371,7 +1385,7 @@ pub(crate) fn emit(
                     let reg_enc = int_reg_enc(*reg);
                     emit_std_enc_enc(
                         sink,
-                        LegacyPrefix::None,
+                        LegacyPrefixes::None,
                         0xFF,
                         1,
                         2, /*subopcode*/
@@ -1384,7 +1398,7 @@ pub(crate) fn emit(
                     let addr = &addr.finalize(state);
                     emit_std_enc_mem(
                         sink,
-                        LegacyPrefix::None,
+                        LegacyPrefixes::None,
                         0xFF,
                         1,
                         2, /*subopcode*/
@@ -1461,7 +1475,7 @@ pub(crate) fn emit(
                     let reg_enc = int_reg_enc(*reg);
                     emit_std_enc_enc(
                         sink,
-                        LegacyPrefix::None,
+                        LegacyPrefixes::None,
                         0xFF,
                         1,
                         4, /*subopcode*/
@@ -1474,7 +1488,7 @@ pub(crate) fn emit(
                     let addr = &addr.finalize(state);
                     emit_std_enc_mem(
                         sink,
-                        LegacyPrefix::None,
+                        LegacyPrefixes::None,
                         0xFF,
                         1,
                         4, /*subopcode*/
@@ -1596,20 +1610,20 @@ pub(crate) fn emit(
             let rex = RexFlags::clear_w();
 
             let (prefix, opcode) = match op {
-                SseOpcode::Cvtss2sd => (LegacyPrefix::_F3, 0x0F5A),
-                SseOpcode::Cvtsd2ss => (LegacyPrefix::_F2, 0x0F5A),
-                SseOpcode::Movaps => (LegacyPrefix::None, 0x0F28),
-                SseOpcode::Movapd => (LegacyPrefix::_66, 0x0F28),
-                SseOpcode::Movdqa => (LegacyPrefix::_66, 0x0F6F),
-                SseOpcode::Movdqu => (LegacyPrefix::_F3, 0x0F6F),
-                SseOpcode::Movsd => (LegacyPrefix::_F2, 0x0F10),
-                SseOpcode::Movss => (LegacyPrefix::_F3, 0x0F10),
-                SseOpcode::Movups => (LegacyPrefix::None, 0x0F10),
-                SseOpcode::Movupd => (LegacyPrefix::_66, 0x0F10),
-                SseOpcode::Sqrtps => (LegacyPrefix::None, 0x0F51),
-                SseOpcode::Sqrtpd => (LegacyPrefix::_66, 0x0F51),
-                SseOpcode::Sqrtss => (LegacyPrefix::_F3, 0x0F51),
-                SseOpcode::Sqrtsd => (LegacyPrefix::_F2, 0x0F51),
+                SseOpcode::Cvtss2sd => (LegacyPrefixes::_F3, 0x0F5A),
+                SseOpcode::Cvtsd2ss => (LegacyPrefixes::_F2, 0x0F5A),
+                SseOpcode::Movaps => (LegacyPrefixes::None, 0x0F28),
+                SseOpcode::Movapd => (LegacyPrefixes::_66, 0x0F28),
+                SseOpcode::Movdqa => (LegacyPrefixes::_66, 0x0F6F),
+                SseOpcode::Movdqu => (LegacyPrefixes::_F3, 0x0F6F),
+                SseOpcode::Movsd => (LegacyPrefixes::_F2, 0x0F10),
+                SseOpcode::Movss => (LegacyPrefixes::_F3, 0x0F10),
+                SseOpcode::Movups => (LegacyPrefixes::None, 0x0F10),
+                SseOpcode::Movupd => (LegacyPrefixes::_66, 0x0F10),
+                SseOpcode::Sqrtps => (LegacyPrefixes::None, 0x0F51),
+                SseOpcode::Sqrtpd => (LegacyPrefixes::_66, 0x0F51),
+                SseOpcode::Sqrtss => (LegacyPrefixes::_F3, 0x0F51),
+                SseOpcode::Sqrtsd => (LegacyPrefixes::_F2, 0x0F51),
                 _ => unimplemented!("Opcode {:?} not implemented", op),
             };
 
@@ -1635,49 +1649,49 @@ pub(crate) fn emit(
         } => {
             let rex = RexFlags::clear_w();
             let (prefix, opcode, length) = match op {
-                SseOpcode::Addps => (LegacyPrefix::None, 0x0F58, 2),
-                SseOpcode::Addpd => (LegacyPrefix::_66, 0x0F58, 2),
-                SseOpcode::Addss => (LegacyPrefix::_F3, 0x0F58, 2),
-                SseOpcode::Addsd => (LegacyPrefix::_F2, 0x0F58, 2),
-                SseOpcode::Andpd => (LegacyPrefix::_66, 0x0F54, 2),
-                SseOpcode::Andps => (LegacyPrefix::None, 0x0F54, 2),
-                SseOpcode::Andnps => (LegacyPrefix::None, 0x0F55, 2),
-                SseOpcode::Andnpd => (LegacyPrefix::_66, 0x0F55, 2),
-                SseOpcode::Divps => (LegacyPrefix::None, 0x0F5E, 2),
-                SseOpcode::Divpd => (LegacyPrefix::_66, 0x0F5E, 2),
-                SseOpcode::Divss => (LegacyPrefix::_F3, 0x0F5E, 2),
-                SseOpcode::Divsd => (LegacyPrefix::_F2, 0x0F5E, 2),
-                SseOpcode::Minps => (LegacyPrefix::None, 0x0F5D, 2),
-                SseOpcode::Minpd => (LegacyPrefix::_66, 0x0F5D, 2),
-                SseOpcode::Minss => (LegacyPrefix::_F3, 0x0F5D, 2),
-                SseOpcode::Minsd => (LegacyPrefix::_F2, 0x0F5D, 2),
-                SseOpcode::Maxps => (LegacyPrefix::None, 0x0F5F, 2),
-                SseOpcode::Maxpd => (LegacyPrefix::_66, 0x0F5F, 2),
-                SseOpcode::Maxss => (LegacyPrefix::_F3, 0x0F5F, 2),
-                SseOpcode::Maxsd => (LegacyPrefix::_F2, 0x0F5F, 2),
-                SseOpcode::Mulps => (LegacyPrefix::None, 0x0F59, 2),
-                SseOpcode::Mulpd => (LegacyPrefix::_66, 0x0F59, 2),
-                SseOpcode::Mulss => (LegacyPrefix::_F3, 0x0F59, 2),
-                SseOpcode::Mulsd => (LegacyPrefix::_F2, 0x0F59, 2),
-                SseOpcode::Orpd => (LegacyPrefix::_66, 0x0F56, 2),
-                SseOpcode::Orps => (LegacyPrefix::None, 0x0F56, 2),
-                SseOpcode::Paddb => (LegacyPrefix::_66, 0x0FFC, 2),
-                SseOpcode::Paddd => (LegacyPrefix::_66, 0x0FFE, 2),
-                SseOpcode::Paddq => (LegacyPrefix::_66, 0x0FD4, 2),
-                SseOpcode::Paddw => (LegacyPrefix::_66, 0x0FFD, 2),
-                SseOpcode::Pmulld => (LegacyPrefix::_66, 0x0F3840, 3),
-                SseOpcode::Pmullw => (LegacyPrefix::_66, 0x0FD5, 2),
-                SseOpcode::Pmuludq => (LegacyPrefix::_66, 0x0FF4, 2),
-                SseOpcode::Psubb => (LegacyPrefix::_66, 0x0FF8, 2),
-                SseOpcode::Psubd => (LegacyPrefix::_66, 0x0FFA, 2),
-                SseOpcode::Psubq => (LegacyPrefix::_66, 0x0FFB, 2),
-                SseOpcode::Psubw => (LegacyPrefix::_66, 0x0FF9, 2),
-                SseOpcode::Subps => (LegacyPrefix::None, 0x0F5C, 2),
-                SseOpcode::Subpd => (LegacyPrefix::_66, 0x0F5C, 2),
-                SseOpcode::Subss => (LegacyPrefix::_F3, 0x0F5C, 2),
-                SseOpcode::Subsd => (LegacyPrefix::_F2, 0x0F5C, 2),
-                SseOpcode::Xorps => (LegacyPrefix::None, 0x0F57, 2),
-                SseOpcode::Xorpd => (LegacyPrefix::_66, 0x0F57, 2),
+                SseOpcode::Addps => (LegacyPrefixes::None, 0x0F58, 2),
+                SseOpcode::Addpd => (LegacyPrefixes::_66, 0x0F58, 2),
+                SseOpcode::Addss => (LegacyPrefixes::_F3, 0x0F58, 2),
+                SseOpcode::Addsd => (LegacyPrefixes::_F2, 0x0F58, 2),
+                SseOpcode::Andpd => (LegacyPrefixes::_66, 0x0F54, 2),
+                SseOpcode::Andps => (LegacyPrefixes::None, 0x0F54, 2),
+                SseOpcode::Andnps => (LegacyPrefixes::None, 0x0F55, 2),
+                SseOpcode::Andnpd => (LegacyPrefixes::_66, 0x0F55, 2),
+                SseOpcode::Divps => (LegacyPrefixes::None, 0x0F5E, 2),
+                SseOpcode::Divpd => (LegacyPrefixes::_66, 0x0F5E, 2),
+                SseOpcode::Divss => (LegacyPrefixes::_F3, 0x0F5E, 2),
+                SseOpcode::Divsd => (LegacyPrefixes::_F2, 0x0F5E, 2),
+                SseOpcode::Minps => (LegacyPrefixes::None, 0x0F5D, 2),
+                SseOpcode::Minpd => (LegacyPrefixes::_66, 0x0F5D, 2),
+                SseOpcode::Minss => (LegacyPrefixes::_F3, 0x0F5D, 2),
+                SseOpcode::Minsd => (LegacyPrefixes::_F2, 0x0F5D, 2),
+                SseOpcode::Maxps => (LegacyPrefixes::None, 0x0F5F, 2),
+                SseOpcode::Maxpd => (LegacyPrefixes::_66, 0x0F5F, 2),
+                SseOpcode::Maxss => (LegacyPrefixes::_F3, 0x0F5F, 2),
+                SseOpcode::Maxsd => (LegacyPrefixes::_F2, 0x0F5F, 2),
+                SseOpcode::Mulps => (LegacyPrefixes::None, 0x0F59, 2),
+                SseOpcode::Mulpd => (LegacyPrefixes::_66, 0x0F59, 2),
+                SseOpcode::Mulss => (LegacyPrefixes::_F3, 0x0F59, 2),
+                SseOpcode::Mulsd => (LegacyPrefixes::_F2, 0x0F59, 2),
+                SseOpcode::Orpd => (LegacyPrefixes::_66, 0x0F56, 2),
+                SseOpcode::Orps => (LegacyPrefixes::None, 0x0F56, 2),
+                SseOpcode::Paddb => (LegacyPrefixes::_66, 0x0FFC, 2),
+                SseOpcode::Paddd => (LegacyPrefixes::_66, 0x0FFE, 2),
+                SseOpcode::Paddq => (LegacyPrefixes::_66, 0x0FD4, 2),
+                SseOpcode::Paddw => (LegacyPrefixes::_66, 0x0FFD, 2),
+                SseOpcode::Pmulld => (LegacyPrefixes::_66, 0x0F3840, 3),
+                SseOpcode::Pmullw => (LegacyPrefixes::_66, 0x0FD5, 2),
+                SseOpcode::Pmuludq => (LegacyPrefixes::_66, 0x0FF4, 2),
+                SseOpcode::Psubb => (LegacyPrefixes::_66, 0x0FF8, 2),
+                SseOpcode::Psubd => (LegacyPrefixes::_66, 0x0FFA, 2),
+                SseOpcode::Psubq => (LegacyPrefixes::_66, 0x0FFB, 2),
+                SseOpcode::Psubw => (LegacyPrefixes::_66, 0x0FF9, 2),
+                SseOpcode::Subps => (LegacyPrefixes::None, 0x0F5C, 2),
+                SseOpcode::Subpd => (LegacyPrefixes::_66, 0x0F5C, 2),
+                SseOpcode::Subss => (LegacyPrefixes::_F3, 0x0F5C, 2),
+                SseOpcode::Subsd => (LegacyPrefixes::_F2, 0x0F5C, 2),
+                SseOpcode::Xorps => (LegacyPrefixes::None, 0x0F57, 2),
+                SseOpcode::Xorpd => (LegacyPrefixes::_66, 0x0F57, 2),
                 _ => unimplemented!("Opcode {:?} not implemented", op),
             };
 
@@ -1780,10 +1794,10 @@ pub(crate) fn emit(
 
         Inst::XmmRmRImm { op, src, dst, imm } => {
             let prefix = match op {
-                SseOpcode::Cmpps => LegacyPrefix::None,
-                SseOpcode::Cmppd => LegacyPrefix::_66,
-                SseOpcode::Cmpss => LegacyPrefix::_F3,
-                SseOpcode::Cmpsd => LegacyPrefix::_F2,
+                SseOpcode::Cmpps => LegacyPrefixes::None,
+                SseOpcode::Cmppd => LegacyPrefixes::_66,
+                SseOpcode::Cmpss => LegacyPrefixes::_F3,
+                SseOpcode::Cmpsd => LegacyPrefixes::_F2,
                 _ => unimplemented!("Opcode {:?} not implemented", op),
             };
             let opcode = 0x0FC2;
@@ -1833,14 +1847,14 @@ pub(crate) fn emit(
             srcloc,
         } => {
             let (prefix, opcode) = match op {
-                SseOpcode::Movaps => (LegacyPrefix::None, 0x0F29),
-                SseOpcode::Movapd => (LegacyPrefix::_66, 0x0F29),
-                SseOpcode::Movdqa => (LegacyPrefix::_66, 0x0F7F),
-                SseOpcode::Movdqu => (LegacyPrefix::_F3, 0x0F7F),
-                SseOpcode::Movss => (LegacyPrefix::_F3, 0x0F11),
-                SseOpcode::Movsd => (LegacyPrefix::_F2, 0x0F11),
-                SseOpcode::Movups => (LegacyPrefix::None, 0x0F11),
-                SseOpcode::Movupd => (LegacyPrefix::_66, 0x0F11),
+                SseOpcode::Movaps => (LegacyPrefixes::None, 0x0F29),
+                SseOpcode::Movapd => (LegacyPrefixes::_66, 0x0F29),
+                SseOpcode::Movdqa => (LegacyPrefixes::_66, 0x0F7F),
+                SseOpcode::Movdqu => (LegacyPrefixes::_F3, 0x0F7F),
+                SseOpcode::Movss => (LegacyPrefixes::_F3, 0x0F11),
+                SseOpcode::Movsd => (LegacyPrefixes::_F2, 0x0F11),
+                SseOpcode::Movups => (LegacyPrefixes::None, 0x0F11),
+                SseOpcode::Movupd => (LegacyPrefixes::_66, 0x0F11),
                 _ => unimplemented!("Opcode {:?} not implemented", op),
             };
             let dst = &dst.finalize(state);
@@ -1860,9 +1874,9 @@ pub(crate) fn emit(
             let (prefix, opcode, dst_first) = match op {
                 // Movd and movq use the same opcode; the presence of the REX prefix (set below)
                 // actually determines which is used.
-                SseOpcode::Movd | SseOpcode::Movq => (LegacyPrefix::_66, 0x0F7E, false),
-                SseOpcode::Cvttss2si => (LegacyPrefix::_F3, 0x0F2C, true),
-                SseOpcode::Cvttsd2si => (LegacyPrefix::_F2, 0x0F2C, true),
+                SseOpcode::Movd | SseOpcode::Movq => (LegacyPrefixes::_66, 0x0F7E, false),
+                SseOpcode::Cvttss2si => (LegacyPrefixes::_F3, 0x0F2C, true),
+                SseOpcode::Cvttsd2si => (LegacyPrefixes::_F2, 0x0F2C, true),
                 _ => panic!("unexpected opcode {:?}", op),
             };
             let rex = match dst_size {
@@ -1888,9 +1902,9 @@ pub(crate) fn emit(
             let (prefix, opcode) = match op {
                 // Movd and movq use the same opcode; the presence of the REX prefix (set below)
                 // actually determines which is used.
-                SseOpcode::Movd | SseOpcode::Movq => (LegacyPrefix::_66, 0x0F6E),
-                SseOpcode::Cvtsi2ss => (LegacyPrefix::_F3, 0x0F2A),
-                SseOpcode::Cvtsi2sd => (LegacyPrefix::_F2, 0x0F2A),
+                SseOpcode::Movd | SseOpcode::Movq => (LegacyPrefixes::_66, 0x0F6E),
+                SseOpcode::Cvtsi2ss => (LegacyPrefixes::_F3, 0x0F2A),
+                SseOpcode::Cvtsi2sd => (LegacyPrefixes::_F2, 0x0F2A),
                 _ => panic!("unexpected opcode {:?}", op),
             };
             let rex = match *src_size {
@@ -1911,8 +1925,8 @@ pub(crate) fn emit(
         Inst::XMM_Cmp_RM_R { op, src, dst } => {
             let rex = RexFlags::clear_w();
             let (prefix, opcode) = match op {
-                SseOpcode::Ucomisd => (LegacyPrefix::_66, 0x0F2E),
-                SseOpcode::Ucomiss => (LegacyPrefix::None, 0x0F2E),
+                SseOpcode::Ucomisd => (LegacyPrefixes::_66, 0x0F2E),
+                SseOpcode::Ucomiss => (LegacyPrefixes::None, 0x0F2E),
                 _ => unimplemented!("Emit xmm cmp rm r"),
             };
 
@@ -2431,6 +2445,113 @@ pub(crate) fn emit(
             }
         }
 
+        Inst::LockCmpxchg {
+            ty,
+            src,
+            dst,
+            srcloc,
+        } => {
+            if let Some(srcloc) = srcloc {
+                sink.add_trap(*srcloc, TrapCode::HeapOutOfBounds);
+            }
+            // lock cmpxchg{b,w,l,q} %src, (dst)
+            // Note that 0xF0 is the Lock prefix.
+            let (prefix, rex, opcodes) = match *ty {
+                types::I8 => {
+                    let mut rex_flags = RexFlags::clear_w();
+                    let enc_src = int_reg_enc(*src);
+                    if enc_src >= 4 && enc_src <= 7 {
+                        rex_flags.always_emit();
+                    };
+                    (LegacyPrefixes::_F0, rex_flags, 0x0FB0)
+                }
+                types::I16 => (LegacyPrefixes::_66F0, RexFlags::clear_w(), 0x0FB1),
+                types::I32 => (LegacyPrefixes::_F0, RexFlags::clear_w(), 0x0FB1),
+                types::I64 => (LegacyPrefixes::_F0, RexFlags::set_w(), 0x0FB1),
+                _ => unreachable!(),
+            };
+            emit_std_reg_mem(sink, prefix, opcodes, 2, *src, &dst.finalize(state), rex);
+        }
+
+        Inst::AtomicRmwSeq { ty, op, srcloc } => {
+            // Emit this:
+            //
+            //    mov{zbq,zwq,zlq,q}     (%r9), %rax  // rax = old value
+            //   again:
+            //    movq                   %rax, %r11   // rax = old value, r11 = old value
+            //    `op`q                  %r10, %r11   // rax = old value, r11 = new value
+            //    lock cmpxchg{b,w,l,q}  %r11, (%r9)  // try to store new value
+            //    jnz again // If this is taken, rax will have a "revised" old value
+            //
+            // Operand conventions:
+            //    IN:  %r9 (addr), %r10 (2nd arg for `op`)
+            //    OUT: %rax (old value), %r11 (trashed), %rflags (trashed)
+            //
+            // In the case where the operation is 'xchg', the "`op`q" instruction is instead
+            //   movq                    %r10, %r11
+            // so that we simply write in the destination, the "2nd arg for `op`".
+            let rax = regs::rax();
+            let r9 = regs::r9();
+            let r10 = regs::r10();
+            let r11 = regs::r11();
+            let rax_w = Writable::from_reg(rax);
+            let r11_w = Writable::from_reg(r11);
+            let amode = Amode::imm_reg(0, r9);
+            let again_label = sink.get_label();
+
+            // mov{zbq,zwq,zlq,q} (%r9), %rax
+            // No need to call `add_trap` here, since the `i1` emit will do that.
+            let i1 = Inst::load(*ty, amode.clone(), rax_w, ExtKind::ZeroExtend, *srcloc);
+            i1.emit(sink, flags, state);
+
+            // again:
+            sink.bind_label(again_label);
+
+            // movq %rax, %r11
+            let i2 = Inst::mov_r_r(true, rax, r11_w);
+            i2.emit(sink, flags, state);
+
+            // opq %r10, %r11
+            let r10_rmi = RegMemImm::reg(r10);
+            let i3 = if *op == inst_common::AtomicRmwOp::Xchg {
+                Inst::mov_r_r(true, r10, r11_w)
+            } else {
+                let alu_op = match op {
+                    inst_common::AtomicRmwOp::Add => AluRmiROpcode::Add,
+                    inst_common::AtomicRmwOp::Sub => AluRmiROpcode::Sub,
+                    inst_common::AtomicRmwOp::And => AluRmiROpcode::And,
+                    inst_common::AtomicRmwOp::Or => AluRmiROpcode::Or,
+                    inst_common::AtomicRmwOp::Xor => AluRmiROpcode::Xor,
+                    inst_common::AtomicRmwOp::Xchg => unreachable!(),
+                };
+                Inst::alu_rmi_r(true, alu_op, r10_rmi, r11_w)
+            };
+            i3.emit(sink, flags, state);
+
+            // lock cmpxchg{b,w,l,q} %r11, (%r9)
+            // No need to call `add_trap` here, since the `i4` emit will do that.
+            let i4 = Inst::LockCmpxchg {
+                ty: *ty,
+                src: r11,
+                dst: amode.into(),
+                srcloc: *srcloc,
+            };
+            i4.emit(sink, flags, state);
+
+            // jnz again
+            one_way_jmp(sink, CC::NZ, again_label);
+        }
+
+        Inst::Fence { kind } => {
+            sink.put1(0x0F);
+            sink.put1(0xAE);
+            match kind {
+                FenceKind::MFence => sink.put1(0xF0), // mfence = 0F AE F0
+                FenceKind::LFence => sink.put1(0xE8), // lfence = 0F AE E8
+                FenceKind::SFence => sink.put1(0xF8), // sfence = 0F AE F8
+            }
+        }
+
         Inst::Hlt => {
             sink.put1(0xcc);
         }
diff --git a/cranelift/codegen/src/isa/x64/inst/emit_tests.rs b/cranelift/codegen/src/isa/x64/inst/emit_tests.rs
index e0f2ea1acd..cb1a6b855a 100644
--- a/cranelift/codegen/src/isa/x64/inst/emit_tests.rs
+++ b/cranelift/codegen/src/isa/x64/inst/emit_tests.rs
@@ -4,10 +4,13 @@
 //!
 //! to see stdout: cargo test -- --nocapture
 //!
-//! for this specific case:
+//! for this specific case, as of 24 Aug 2020:
 //!
-//! (cd cranelift/codegen && \
-//! RUST_BACKTRACE=1 cargo test isa::x64::inst::test_x64_insn_encoding_and_printing -- --nocapture)
+//! cd to the top of your wasmtime tree, then:
+//! RUST_BACKTRACE=1 cargo test --features test-programs/test_programs \
+//!   --features experimental_x64 --all --exclude peepmatic --exclude lightbeam \
+//!   --exclude wasmtime-lightbeam --exclude peepmatic-automata --exclude peepmatic-fuzzing \
+//!  --exclude peepmatic-macro -- isa::x64::inst::emit_tests::test_x64_emit
 
 use super::*;
 use crate::isa::test_utils;
@@ -3272,6 +3275,174 @@ fn test_x64_emit() {
         "cmpps   $0, %xmm15, %xmm7",
     ));
 
+    // ========================================================
+    // Pertaining to atomics.
+    let am1: SyntheticAmode = Amode::imm_reg_reg_shift(321, r10, rdx, 2).into();
+    // `am2` doesn't contribute any 1 bits to the rex prefix, so we must use it when testing
+    // for retention of the apparently-redundant rex prefix in the 8-bit case.
+    let am2: SyntheticAmode = Amode::imm_reg_reg_shift(-12345i32 as u32, rcx, rsi, 3).into();
+
+    // A general 8-bit case.
+    insns.push((
+        Inst::LockCmpxchg {
+            ty: types::I8,
+            src: rbx,
+            dst: am1,
+            srcloc: None,
+        },
+        "F0410FB09C9241010000",
+        "lock cmpxchgb %bl, 321(%r10,%rdx,4)",
+    ));
+    // Check redundant rex retention in 8-bit cases.
+    insns.push((
+        Inst::LockCmpxchg {
+            ty: types::I8,
+            src: rdx,
+            dst: am2.clone(),
+            srcloc: None,
+        },
+        "F00FB094F1C7CFFFFF",
+        "lock cmpxchgb %dl, -12345(%rcx,%rsi,8)",
+    ));
+    insns.push((
+        Inst::LockCmpxchg {
+            ty: types::I8,
+            src: rsi,
+            dst: am2.clone(),
+            srcloc: None,
+        },
+        "F0400FB0B4F1C7CFFFFF",
+        "lock cmpxchgb %sil, -12345(%rcx,%rsi,8)",
+    ));
+    insns.push((
+        Inst::LockCmpxchg {
+            ty: types::I8,
+            src: r10,
+            dst: am2.clone(),
+            srcloc: None,
+        },
+        "F0440FB094F1C7CFFFFF",
+        "lock cmpxchgb %r10b, -12345(%rcx,%rsi,8)",
+    ));
+    insns.push((
+        Inst::LockCmpxchg {
+            ty: types::I8,
+            src: r15,
+            dst: am2.clone(),
+            srcloc: None,
+        },
+        "F0440FB0BCF1C7CFFFFF",
+        "lock cmpxchgb %r15b, -12345(%rcx,%rsi,8)",
+    ));
+    // 16 bit cases
+    insns.push((
+        Inst::LockCmpxchg {
+            ty: types::I16,
+            src: rsi,
+            dst: am2.clone(),
+            srcloc: None,
+        },
+        "66F00FB1B4F1C7CFFFFF",
+        "lock cmpxchgw %si, -12345(%rcx,%rsi,8)",
+    ));
+    insns.push((
+        Inst::LockCmpxchg {
+            ty: types::I16,
+            src: r10,
+            dst: am2.clone(),
+            srcloc: None,
+        },
+        "66F0440FB194F1C7CFFFFF",
+        "lock cmpxchgw %r10w, -12345(%rcx,%rsi,8)",
+    ));
+    // 32 bit cases
+    insns.push((
+        Inst::LockCmpxchg {
+            ty: types::I32,
+            src: rsi,
+            dst: am2.clone(),
+            srcloc: None,
+        },
+        "F00FB1B4F1C7CFFFFF",
+        "lock cmpxchgl %esi, -12345(%rcx,%rsi,8)",
+    ));
+    insns.push((
+        Inst::LockCmpxchg {
+            ty: types::I32,
+            src: r10,
+            dst: am2.clone(),
+            srcloc: None,
+        },
+        "F0440FB194F1C7CFFFFF",
+        "lock cmpxchgl %r10d, -12345(%rcx,%rsi,8)",
+    ));
+    // 64 bit cases
+    insns.push((
+        Inst::LockCmpxchg {
+            ty: types::I64,
+            src: rsi,
+            dst: am2.clone(),
+            srcloc: None,
+        },
+        "F0480FB1B4F1C7CFFFFF",
+        "lock cmpxchgq %rsi, -12345(%rcx,%rsi,8)",
+    ));
+    insns.push((
+        Inst::LockCmpxchg {
+            ty: types::I64,
+            src: r10,
+            dst: am2.clone(),
+            srcloc: None,
+        },
+        "F04C0FB194F1C7CFFFFF",
+        "lock cmpxchgq %r10, -12345(%rcx,%rsi,8)",
+    ));
+
+    // AtomicRmwSeq
+    insns.push((
+        Inst::AtomicRmwSeq { ty: types::I8, op: inst_common::AtomicRmwOp::Or, srcloc: None },
+        "490FB6014989C34D09D3F0450FB0190F85EFFFFFFF",
+        "atomically { 8_bits_at_[%r9]) Or= %r10; %rax = old_value_at_[%r9]; %r11, %rflags = trash }"
+    ));
+    insns.push((
+        Inst::AtomicRmwSeq { ty: types::I16, op: inst_common::AtomicRmwOp::And, srcloc: None },
+        "490FB7014989C34D21D366F0450FB1190F85EEFFFFFF",
+        "atomically { 16_bits_at_[%r9]) And= %r10; %rax = old_value_at_[%r9]; %r11, %rflags = trash }"
+    ));
+    insns.push((
+        Inst::AtomicRmwSeq { ty: types::I32, op: inst_common::AtomicRmwOp::Xchg, srcloc: None },
+        "418B014989C34D89D3F0450FB1190F85EFFFFFFF",
+        "atomically { 32_bits_at_[%r9]) Xchg= %r10; %rax = old_value_at_[%r9]; %r11, %rflags = trash }"
+    ));
+    insns.push((
+        Inst::AtomicRmwSeq { ty: types::I64, op: inst_common::AtomicRmwOp::Add, srcloc: None },
+        "498B014989C34D01D3F04D0FB1190F85EFFFFFFF",
+        "atomically { 64_bits_at_[%r9]) Add= %r10; %rax = old_value_at_[%r9]; %r11, %rflags = trash }"
+    ));
+
+    // Fence
+    insns.push((
+        Inst::Fence {
+            kind: FenceKind::MFence,
+        },
+        "0FAEF0",
+        "mfence",
+    ));
+    insns.push((
+        Inst::Fence {
+            kind: FenceKind::LFence,
+        },
+        "0FAEE8",
+        "lfence",
+    ));
+    insns.push((
+        Inst::Fence {
+            kind: FenceKind::SFence,
+        },
+        "0FAEF8",
+        "sfence",
+    ));
+
     // ========================================================
     // Misc instructions.
 
diff --git a/cranelift/codegen/src/isa/x64/inst/mod.rs b/cranelift/codegen/src/isa/x64/inst/mod.rs
index 712a9b508e..da2dca2060 100644
--- a/cranelift/codegen/src/isa/x64/inst/mod.rs
+++ b/cranelift/codegen/src/isa/x64/inst/mod.rs
@@ -404,6 +404,56 @@ pub enum Inst {
         offset: i64,
     },
 
+    // =====================================
+    // Instructions pertaining to atomic memory accesses.
+    /// A standard (native) `lock cmpxchg src, (amode)`, with register conventions:
+    ///
+    /// `dst`  (read) address
+    /// `src`  (read) replacement value
+    /// %rax   (modified) in: expected value, out: value that was actually at `dst`
+    /// %rflags is written.  Do not assume anything about it after the instruction.
+    ///
+    /// The instruction "succeeded" iff the lowest `ty` bits of %rax afterwards are the same as
+    /// they were before.
+    LockCmpxchg {
+        ty: Type, // I8, I16, I32 or I64
+        src: Reg,
+        dst: SyntheticAmode,
+        srcloc: Option<SourceLoc>,
+    },
+
+    /// A synthetic instruction, based on a loop around a native `lock cmpxchg` instruction.
+    /// This atomically modifies a value in memory and returns the old value.  The sequence
+    /// consists of an initial "normal" load from `dst`, followed by a loop which computes the
+    /// new value and tries to compare-and-swap ("CAS") it into `dst`, using the native
+    /// instruction `lock cmpxchg{b,w,l,q}` .  The loop iterates until the CAS is successful.
+    /// If there is no contention, there will be only one pass through the loop body.  The
+    /// sequence does *not* perform any explicit memory fence instructions
+    /// (mfence/sfence/lfence).
+    ///
+    /// Note that the transaction is atomic in the sense that, as observed by some other thread,
+    /// `dst` either has the initial or final value, but no other.  It isn't atomic in the sense
+    /// of guaranteeing that no other thread writes to `dst` in between the initial load and the
+    /// CAS -- but that would cause the CAS to fail unless the other thread's last write before
+    /// the CAS wrote the same value that was already there.  In other words, this
+    /// implementation suffers (unavoidably) from the A-B-A problem.
+    ///
+    /// This instruction sequence has fixed register uses as follows:
+    ///
+    /// %r9   (read) address
+    /// %r10  (read) second operand for `op`
+    /// %r11  (written) scratch reg; value afterwards has no meaning
+    /// %rax  (written) the old value at %r9
+    /// %rflags is written.  Do not assume anything about it after the instruction.
+    AtomicRmwSeq {
+        ty: Type, // I8, I16, I32 or I64
+        op: inst_common::AtomicRmwOp,
+        srcloc: Option<SourceLoc>,
+    },
+
+    /// A memory fence (mfence, lfence or sfence).
+    Fence { kind: FenceKind },
+
     // =====================================
     // Meta-instructions generating no code.
     /// Marker, no-op in generated code: SP "virtual offset" is adjusted. This
@@ -1521,6 +1571,26 @@ impl ShowWithRRU for Inst {
                 show_ireg_sized(dst.to_reg(), mb_rru, 8),
             ),
 
+            Inst::LockCmpxchg { ty, src, dst, .. } => {
+                let size = ty.bytes() as u8;
+                format!("lock cmpxchg{} {}, {}",
+                        suffixBWLQ(size), show_ireg_sized(*src, mb_rru, size), dst.show_rru(mb_rru))
+            }
+
+            Inst::AtomicRmwSeq { ty, op, .. } => {
+                format!(
+                    "atomically {{ {}_bits_at_[%r9]) {:?}= %r10; %rax = old_value_at_[%r9]; %r11, %rflags = trash }}",
+                    ty.bits(), op)
+            },
+
+            Inst::Fence { kind } => {
+                match kind {
+                    FenceKind::MFence => "mfence".to_string(),
+                    FenceKind::LFence => "lfence".to_string(),
+                    FenceKind::SFence => "sfence".to_string(),
+                }
+            }
+
             Inst::VirtualSPOffsetAdj { offset } => format!("virtual_sp_offset_adjust {}", offset),
 
             Inst::Hlt => "hlt".into(),
@@ -1737,6 +1807,19 @@ fn x64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
             collector.add_def(*dst);
         }
 
+        Inst::LockCmpxchg { src, dst, .. } => {
+            dst.get_regs_as_uses(collector);
+            collector.add_use(*src);
+            collector.add_mod(Writable::from_reg(regs::rax()));
+        }
+
+        Inst::AtomicRmwSeq { .. } => {
+            collector.add_use(regs::r9());
+            collector.add_use(regs::r10());
+            collector.add_def(Writable::from_reg(regs::r11()));
+            collector.add_def(Writable::from_reg(regs::rax()));
+        }
+
         Inst::Ret
         | Inst::EpiloguePlaceholder
         | Inst::JmpKnown { .. }
@@ -1745,7 +1828,8 @@ fn x64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
         | Inst::TrapIf { .. }
         | Inst::VirtualSPOffsetAdj { .. }
         | Inst::Hlt
-        | Inst::Ud2 { .. } => {
+        | Inst::Ud2 { .. }
+        | Inst::Fence { .. } => {
             // No registers are used.
         }
     }
@@ -2091,6 +2175,15 @@ fn x64_map_regs<RUM: RegUsageMapper>(inst: &mut Inst, mapper: &RUM) {
 
         Inst::LoadExtName { ref mut dst, .. } => map_def(mapper, dst),
 
+        Inst::LockCmpxchg {
+            ref mut src,
+            ref mut dst,
+            ..
+        } => {
+            map_use(mapper, src);
+            dst.map_uses(mapper);
+        }
+
         Inst::Ret
         | Inst::EpiloguePlaceholder
         | Inst::JmpKnown { .. }
@@ -2099,8 +2192,11 @@ fn x64_map_regs<RUM: RegUsageMapper>(inst: &mut Inst, mapper: &RUM) {
         | Inst::TrapIf { .. }
         | Inst::VirtualSPOffsetAdj { .. }
         | Inst::Ud2 { .. }
-        | Inst::Hlt => {
-            // No registers are used.
+        | Inst::Hlt
+        | Inst::AtomicRmwSeq { .. }
+        | Inst::Fence { .. } => {
+            // Instruction doesn't explicitly mention any regs, so it can't have any virtual
+            // regs that we'd need to remap.  Hence no action required.
         }
     }
 }
diff --git a/cranelift/codegen/src/isa/x64/lower.rs b/cranelift/codegen/src/isa/x64/lower.rs
index f4eb306882..1b494db706 100644
--- a/cranelift/codegen/src/isa/x64/lower.rs
+++ b/cranelift/codegen/src/isa/x64/lower.rs
@@ -2,6 +2,7 @@
 
 #![allow(non_snake_case)]
 
+use crate::ir;
 use crate::ir::{
     condcodes::FloatCC, condcodes::IntCC, types, AbiParam, ArgumentPurpose, ExternalName,
     Inst as IRInst, InstructionData, LibCall, Opcode, Signature, TrapCode, Type,
@@ -45,6 +46,14 @@ fn is_bool_ty(ty: Type) -> bool {
     }
 }
 
+/// This is target-word-size dependent.  And it excludes booleans and reftypes.
+fn is_valid_atomic_transaction_ty(ty: Type) -> bool {
+    match ty {
+        types::I8 | types::I16 | types::I32 | types::I64 => true,
+        _ => false,
+    }
+}
+
 fn iri_to_u64_imm(ctx: Ctx, inst: IRInst) -> Option<u64> {
     ctx.get_constant(inst)
 }
@@ -82,6 +91,13 @@ fn inst_fp_condcode(data: &InstructionData) -> Option<FloatCC> {
     }
 }
 
+fn inst_atomic_rmw_op(data: &InstructionData) -> Option<ir::AtomicRmwOp> {
+    match data {
+        &InstructionData::AtomicRmw { op, .. } => Some(op),
+        _ => None,
+    }
+}
+
 fn ldst_offset(data: &InstructionData) -> Option<i32> {
     match data {
         &InstructionData::Load { offset, .. }
@@ -1732,6 +1748,148 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
             });
         }
 
+        Opcode::AtomicRmw => {
+            // This is a simple, general-case atomic update, based on a loop involving
+            // `cmpxchg`.  Note that we could do much better than this in the case where the old
+            // value at the location (that is to say, the SSA `Value` computed by this CLIF
+            // instruction) is not required.  In that case, we could instead implement this
+            // using a single `lock`-prefixed x64 read-modify-write instruction.  Also, even in
+            // the case where the old value is required, for the `add` and `sub` cases, we can
+            // use the single instruction `lock xadd`.  However, those improvements have been
+            // left for another day.
+            // TODO: filed as https://github.com/bytecodealliance/wasmtime/issues/2153
+            let dst = output_to_reg(ctx, outputs[0]);
+            let mut addr = input_to_reg(ctx, inputs[0]);
+            let mut arg2 = input_to_reg(ctx, inputs[1]);
+            let ty_access = ty.unwrap();
+            assert!(is_valid_atomic_transaction_ty(ty_access));
+            let memflags = ctx.memflags(insn).expect("memory flags");
+            let srcloc = if !memflags.notrap() {
+                Some(ctx.srcloc(insn))
+            } else {
+                None
+            };
+            // Make sure that both args are in virtual regs, since in effect we have to do a
+            // parallel copy to get them safely to the AtomicRmwSeq input regs, and that's not
+            // guaranteed safe if either is in a real reg.
+            addr = ctx.ensure_in_vreg(addr, types::I64);
+            arg2 = ctx.ensure_in_vreg(arg2, types::I64);
+            // Move the args to the preordained AtomicRMW input regs.  Note that `AtomicRmwSeq`
+            // operates at whatever width is specified by `ty`, so there's no need to
+            // zero-extend `arg2` in the case of `ty` being I8/I16/I32.
+            ctx.emit(Inst::gen_move(
+                Writable::from_reg(regs::r9()),
+                addr,
+                types::I64,
+            ));
+            ctx.emit(Inst::gen_move(
+                Writable::from_reg(regs::r10()),
+                arg2,
+                types::I64,
+            ));
+            // Now the AtomicRmwSeq (pseudo-) instruction itself
+            let op = inst_common::AtomicRmwOp::from(inst_atomic_rmw_op(ctx.data(insn)).unwrap());
+            ctx.emit(Inst::AtomicRmwSeq {
+                ty: ty_access,
+                op,
+                srcloc,
+            });
+            // And finally, copy the preordained AtomicRmwSeq output reg to its destination.
+            ctx.emit(Inst::gen_move(dst, regs::rax(), types::I64));
+        }
+
+        Opcode::AtomicCas => {
+            // This is very similar to, but not identical to, the `AtomicRmw` case.  As with
+            // `AtomicRmw`, there's no need to zero-extend narrow values here.
+            let dst = output_to_reg(ctx, outputs[0]);
+            let addr = input_to_reg(ctx, inputs[0]);
+            let expected = input_to_reg(ctx, inputs[1]);
+            let replacement = input_to_reg(ctx, inputs[2]);
+            let ty_access = ty.unwrap();
+            assert!(is_valid_atomic_transaction_ty(ty_access));
+            let memflags = ctx.memflags(insn).expect("memory flags");
+            let srcloc = if !memflags.notrap() {
+                Some(ctx.srcloc(insn))
+            } else {
+                None
+            };
+            // Move the expected value into %rax.  Because there's only one fixed register on
+            // the input side, we don't have to use `ensure_in_vreg`, as is necessary in the
+            // `AtomicRmw` case.
+            ctx.emit(Inst::gen_move(
+                Writable::from_reg(regs::rax()),
+                expected,
+                types::I64,
+            ));
+            ctx.emit(Inst::LockCmpxchg {
+                ty: ty_access,
+                src: replacement,
+                dst: Amode::imm_reg(0, addr).into(),
+                srcloc,
+            });
+            // And finally, copy the old value at the location to its destination reg.
+            ctx.emit(Inst::gen_move(dst, regs::rax(), types::I64));
+        }
+
+        Opcode::AtomicLoad => {
+            // This is a normal load.  The x86-TSO memory model provides sufficient sequencing
+            // to satisfy the CLIF synchronisation requirements for `AtomicLoad` without the
+            // need for any fence instructions.
+            let data = output_to_reg(ctx, outputs[0]);
+            let addr = input_to_reg(ctx, inputs[0]);
+            let ty_access = ty.unwrap();
+            assert!(is_valid_atomic_transaction_ty(ty_access));
+            let memflags = ctx.memflags(insn).expect("memory flags");
+            let srcloc = if !memflags.notrap() {
+                Some(ctx.srcloc(insn))
+            } else {
+                None
+            };
+            // For the amode, we could do better, but for now just use `0(addr)`.
+            let rm = RegMem::mem(Amode::imm_reg(0, addr));
+            if ty_access == types::I64 {
+                ctx.emit(Inst::mov64_rm_r(rm, data, srcloc));
+            } else {
+                let ext_mode = match ty_access {
+                    types::I8 => ExtMode::BQ,
+                    types::I16 => ExtMode::WQ,
+                    types::I32 => ExtMode::LQ,
+                    _ => panic!("lowering AtomicLoad: invalid type"),
+                };
+                ctx.emit(Inst::movzx_rm_r(ext_mode, rm, data, srcloc));
+            }
+        }
+
+        Opcode::AtomicStore => {
+            // This is a normal store, followed by an `mfence` instruction.
+            let data = input_to_reg(ctx, inputs[0]);
+            let addr = input_to_reg(ctx, inputs[1]);
+            let ty_access = ctx.input_ty(insn, 0);
+            assert!(is_valid_atomic_transaction_ty(ty_access));
+            let memflags = ctx.memflags(insn).expect("memory flags");
+            let srcloc = if !memflags.notrap() {
+                Some(ctx.srcloc(insn))
+            } else {
+                None
+            };
+            // For the amode, we could do better, but for now just use `0(addr)`.
+            ctx.emit(Inst::mov_r_m(
+                ty_access.bytes() as u8,
+                data,
+                Amode::imm_reg(0, addr),
+                srcloc,
+            ));
+            ctx.emit(Inst::Fence {
+                kind: FenceKind::MFence,
+            });
+        }
+
+        Opcode::Fence => {
+            ctx.emit(Inst::Fence {
+                kind: FenceKind::MFence,
+            });
+        }
+
         Opcode::FuncAddr => {
             let dst = output_to_reg(ctx, outputs[0]);
             let (extname, _) = ctx.call_target(insn).unwrap();
diff --git a/cranelift/codegen/src/machinst/inst_common.rs b/cranelift/codegen/src/machinst/inst_common.rs
new file mode 100644
index 0000000000..9566c56e53
--- /dev/null
+++ b/cranelift/codegen/src/machinst/inst_common.rs
@@ -0,0 +1,36 @@
+//! A place to park MachInst::Inst fragments which are common across multiple architectures.
+
+use crate::ir;
+
+/// Atomic memory update operations.  As of 21 Aug 2020 these are used for the aarch64 and x64
+/// targets.
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
+#[repr(u8)]
+pub enum AtomicRmwOp {
+    /// Add
+    Add,
+    /// Sub
+    Sub,
+    /// And
+    And,
+    /// Or
+    Or,
+    /// Exclusive Or
+    Xor,
+    /// Exchange (swap operands)
+    Xchg,
+}
+
+impl AtomicRmwOp {
+    /// Converts an `ir::AtomicRmwOp` to the corresponding `inst_common::AtomicRmwOp`.
+    pub fn from(ir_op: ir::AtomicRmwOp) -> Self {
+        match ir_op {
+            ir::AtomicRmwOp::Add => AtomicRmwOp::Add,
+            ir::AtomicRmwOp::Sub => AtomicRmwOp::Sub,
+            ir::AtomicRmwOp::And => AtomicRmwOp::And,
+            ir::AtomicRmwOp::Or => AtomicRmwOp::Or,
+            ir::AtomicRmwOp::Xor => AtomicRmwOp::Xor,
+            ir::AtomicRmwOp::Xchg => AtomicRmwOp::Xchg,
+        }
+    }
+}
diff --git a/cranelift/codegen/src/machinst/mod.rs b/cranelift/codegen/src/machinst/mod.rs
index b8ec275133..915764436e 100644
--- a/cranelift/codegen/src/machinst/mod.rs
+++ b/cranelift/codegen/src/machinst/mod.rs
@@ -133,6 +133,8 @@ pub mod adapter;
 pub use adapter::*;
 pub mod helpers;
 pub use helpers::*;
+pub mod inst_common;
+pub use inst_common::*;
 
 /// A machine instruction.
 pub trait MachInst: Clone + Debug {