diff --git a/cranelift/codegen/meta/src/shared/instructions.rs b/cranelift/codegen/meta/src/shared/instructions.rs
index 04cbc87065..ffded34f95 100644
--- a/cranelift/codegen/meta/src/shared/instructions.rs
+++ b/cranelift/codegen/meta/src/shared/instructions.rs
@@ -4600,7 +4600,8 @@ pub(crate) fn define(
             r#"
         Atomically load from memory at `p`.
 
-        It should only be used for integer types with 32 or 64 bits.
+        This is a polymorphic instruction that can load any value type which has a memory
+        representation.  It should only be used for integer types with 8, 16, 32 or 64 bits.
         This operation is sequentially consistent and creates happens-before edges that order
         normal (non-atomic) loads and stores.
         "#,
@@ -4612,124 +4613,14 @@ pub(crate) fn define(
         .other_side_effects(true),
     );
 
-    ig.push(
-        Inst::new(
-            "atomic_uload8",
-            r#"
-        Atomically load 8 bits from memory at `p` and zero-extend to either 32 or 64 bits.
-
-        This is equivalent to ``load.i8`` followed by ``uextend``.
-
-        This operation is sequentially consistent and creates happens-before edges that order
-        normal (non-atomic) loads and stores.
-        "#,
-            &formats.load_no_offset,
-        )
-        .operands_in(vec![MemFlags, p])
-        .operands_out(vec![a])
-        .can_load(true)
-        .other_side_effects(true),
-    );
-
-    ig.push(
-        Inst::new(
-            "atomic_uload16",
-            r#"
-        Atomically load 16 bits from memory at `p` and zero-extend to either 32 or 64 bits.
-
-        This is equivalent to ``load.i16`` followed by ``uextend``.
-
-        This operation is sequentially consistent and creates
-        happens-before edges that order normal (non-atomic) loads and stores.
-        "#,
-            &formats.load_no_offset,
-        )
-        .operands_in(vec![MemFlags, p])
-        .operands_out(vec![a])
-        .can_load(true)
-        .other_side_effects(true),
-    );
-
-    ig.push(
-        Inst::new(
-            "atomic_uload32",
-            r#"
-        Atomically load 32 bits from memory at `p` and zero-extend to 64 bits.
-
-        This is equivalent to ``load.i32`` followed by ``uextend``.
-
-        This operation is sequentially consistent and creates
-        happens-before edges that order normal (non-atomic) loads and stores.
-        "#,
-            &formats.load_no_offset,
-        )
-        .operands_in(vec![MemFlags, p])
-        .operands_out(vec![a])
-        .can_load(true)
-        .other_side_effects(true),
-    );
-
     ig.push(
         Inst::new(
             "atomic_store",
             r#"
         Atomically store `x` to memory at `p`.
 
-        This is a polymorphic instruction that can store a 32 or 64-bit value.
-        This operation is sequentially consistent and creates happens-before edges that order
-        normal (non-atomic) loads and stores.
-        "#,
-            &formats.store_no_offset,
-        )
-        .operands_in(vec![MemFlags, x, p])
-        .can_store(true)
-        .other_side_effects(true),
-    );
-
-    ig.push(
-        Inst::new(
-            "atomic_store8",
-            r#"
-        Atomically store the low 8 bits of `x` to memory at `p`.
-
-        This is equivalent to ``ireduce.i8`` followed by ``store.i8``.
-
-        This operation is sequentially consistent and creates happens-before edges that order
-        normal (non-atomic) loads and stores.
-        "#,
-            &formats.store_no_offset,
-        )
-        .operands_in(vec![MemFlags, x, p])
-        .can_store(true)
-        .other_side_effects(true),
-    );
-
-    ig.push(
-        Inst::new(
-            "atomic_store16",
-            r#"
-        Atomically store the low 16 bits of `x` to memory at `p`.
-
-        This is equivalent to ``ireduce.i16`` followed by ``store.i16``.
-
-        This operation is sequentially consistent and creates happens-before edges that order
-        normal (non-atomic) loads and stores.
-        "#,
-            &formats.store_no_offset,
-        )
-        .operands_in(vec![MemFlags, x, p])
-        .can_store(true)
-        .other_side_effects(true),
-    );
-
-    ig.push(
-        Inst::new(
-            "atomic_store32",
-            r#"
-        Atomically store the low 32 bits of `x` to memory at `p`.
-
-        This is equivalent to ``ireduce.i32`` followed by ``store.i32``.
-
+        This is a polymorphic instruction that can store any value type with a memory
+        representation.  It should only be used for integer types with 8, 16, 32 or 64 bits.
         This operation is sequentially consistent and creates happens-before edges that order
         normal (non-atomic) loads and stores.
         "#,
diff --git a/cranelift/codegen/src/isa/aarch64/lower.rs b/cranelift/codegen/src/isa/aarch64/lower.rs
index 12535cf382..6e64654568 100644
--- a/cranelift/codegen/src/isa/aarch64/lower.rs
+++ b/cranelift/codegen/src/isa/aarch64/lower.rs
@@ -1740,6 +1740,22 @@ pub(crate) fn is_valid_atomic_transaction_ty(ty: Type) -> bool {
     }
 }
 
+pub(crate) fn emit_atomic_load<C: LowerCtx<I = Inst>>(
+    ctx: &mut C,
+    rt: Writable<Reg>,
+    insn: IRInst,
+) {
+    assert!(ctx.data(insn).opcode() == Opcode::AtomicLoad);
+    let inputs = insn_inputs(ctx, insn);
+    let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
+    let access_ty = ctx.output_ty(insn, 0);
+    assert!(is_valid_atomic_transaction_ty(access_ty));
+    // We're ignoring the result type of the load because the LoadAcquire will
+    // explicitly zero extend to the nearest word, and also zero the high half
+    // of an X register.
+    ctx.emit(Inst::LoadAcquire { access_ty, rt, rn });
+}
+
 fn load_op_to_ty(op: Opcode) -> Option<Type> {
     match op {
         Opcode::Sload8 | Opcode::Uload8 | Opcode::Sload8Complex | Opcode::Uload8Complex => Some(I8),
diff --git a/cranelift/codegen/src/isa/aarch64/lower_inst.rs b/cranelift/codegen/src/isa/aarch64/lower_inst.rs
index 8a4df2026b..3ddc3712a0 100644
--- a/cranelift/codegen/src/isa/aarch64/lower_inst.rs
+++ b/cranelift/codegen/src/isa/aarch64/lower_inst.rs
@@ -521,6 +521,19 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
         }
 
         Opcode::Uextend | Opcode::Sextend => {
+            if op == Opcode::Uextend {
+                let inputs = ctx.get_input_as_source_or_const(inputs[0].insn, inputs[0].input);
+                if let Some((atomic_load, 0)) = inputs.inst {
+                    if ctx.data(atomic_load).opcode() == Opcode::AtomicLoad {
+                        let output_ty = ty.unwrap();
+                        assert!(output_ty == I32 || output_ty == I64);
+                        let rt = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
+                        emit_atomic_load(ctx, rt, atomic_load);
+                        ctx.sink_inst(atomic_load);
+                        return Ok(());
+                    }
+                }
+            }
             let output_ty = ty.unwrap();
             let input_ty = ctx.input_ty(insn, 0);
             let from_bits = ty_bits(input_ty) as u8;
@@ -1522,38 +1535,15 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
             }
         }
 
-        Opcode::AtomicLoad
-        | Opcode::AtomicUload8
-        | Opcode::AtomicUload16
-        | Opcode::AtomicUload32 => {
+        Opcode::AtomicLoad => {
             let rt = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
-            let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
-            let ty = ty.unwrap();
-            let access_ty = match op {
-                Opcode::AtomicLoad => ty,
-                Opcode::AtomicUload8 => I8,
-                Opcode::AtomicUload16 => I16,
-                Opcode::AtomicUload32 => I32,
-                _ => panic!(),
-            };
-            assert!(is_valid_atomic_transaction_ty(access_ty));
-            ctx.emit(Inst::LoadAcquire { access_ty, rt, rn });
+            emit_atomic_load(ctx, rt, insn);
         }
 
-        Opcode::AtomicStore
-        | Opcode::AtomicStore32
-        | Opcode::AtomicStore16
-        | Opcode::AtomicStore8 => {
+        Opcode::AtomicStore => {
             let rt = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
             let rn = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
-            let ty = ctx.input_ty(insn, 0);
-            let access_ty = match op {
-                Opcode::AtomicStore => ty,
-                Opcode::AtomicStore32 => I32,
-                Opcode::AtomicStore16 => I16,
-                Opcode::AtomicStore8 => I8,
-                _ => unreachable!(),
-            };
+            let access_ty = ctx.input_ty(insn, 0);
             assert!(is_valid_atomic_transaction_ty(access_ty));
             ctx.emit(Inst::StoreRelease { access_ty, rt, rn });
         }
diff --git a/cranelift/codegen/src/isa/s390x/lower.rs b/cranelift/codegen/src/isa/s390x/lower.rs
index 8ff375788a..b13edc4bb2 100644
--- a/cranelift/codegen/src/isa/s390x/lower.rs
+++ b/cranelift/codegen/src/isa/s390x/lower.rs
@@ -2734,61 +2734,37 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
                 ctx.emit(Inst::AtomicCas64 { rd, rn, mem });
             }
         }
-        Opcode::AtomicLoad
-        | Opcode::AtomicUload8
-        | Opcode::AtomicUload16
-        | Opcode::AtomicUload32 => {
+        Opcode::AtomicLoad => {
             let flags = ctx.memflags(insn).unwrap();
             let endianness = flags.endianness(Endianness::Big);
             let ty = ty.unwrap();
-            let access_ty = match op {
-                Opcode::AtomicLoad => ty,
-                Opcode::AtomicUload8 => types::I8,
-                Opcode::AtomicUload16 => types::I16,
-                Opcode::AtomicUload32 => types::I32,
-                _ => unreachable!(),
-            };
-            assert!(is_valid_atomic_transaction_ty(access_ty));
+            assert!(is_valid_atomic_transaction_ty(ty));
 
             let mem = lower_address(ctx, &inputs[..], 0, flags);
             let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
 
             if endianness == Endianness::Big {
-                ctx.emit(match (ty_bits(access_ty), ty_bits(ty)) {
-                    (8, 32) => Inst::Load32ZExt8 { rd, mem },
-                    (8, 64) => Inst::Load64ZExt8 { rd, mem },
-                    (16, 32) => Inst::Load32ZExt16 { rd, mem },
-                    (16, 64) => Inst::Load64ZExt16 { rd, mem },
-                    (32, 32) => Inst::Load32 { rd, mem },
-                    (32, 64) => Inst::Load64ZExt32 { rd, mem },
-                    (64, 64) => Inst::Load64 { rd, mem },
+                ctx.emit(match ty_bits(ty) {
+                    8 => Inst::Load32ZExt8 { rd, mem },
+                    16 => Inst::Load32ZExt16 { rd, mem },
+                    32 => Inst::Load32 { rd, mem },
+                    64 => Inst::Load64 { rd, mem },
                     _ => panic!("Unsupported size in load"),
                 });
             } else {
-                ctx.emit(match (ty_bits(access_ty), ty_bits(ty)) {
-                    (8, 32) => Inst::Load32ZExt8 { rd, mem },
-                    (8, 64) => Inst::Load64ZExt8 { rd, mem },
-                    (16, 32) => Inst::LoadRev16 { rd, mem },
-                    (32, 32) => Inst::LoadRev32 { rd, mem },
-                    (64, 64) => Inst::LoadRev64 { rd, mem },
+                ctx.emit(match ty_bits(ty) {
+                    8 => Inst::Load32ZExt8 { rd, mem },
+                    16 => Inst::LoadRev16 { rd, mem },
+                    32 => Inst::LoadRev32 { rd, mem },
+                    64 => Inst::LoadRev64 { rd, mem },
                     _ => panic!("Unsupported size in load"),
                 });
             }
         }
-        Opcode::AtomicStore
-        | Opcode::AtomicStore32
-        | Opcode::AtomicStore16
-        | Opcode::AtomicStore8 => {
+        Opcode::AtomicStore => {
             let flags = ctx.memflags(insn).unwrap();
             let endianness = flags.endianness(Endianness::Big);
-            let data_ty = ctx.input_ty(insn, 0);
-            let ty = match op {
-                Opcode::AtomicStore => data_ty,
-                Opcode::AtomicStore32 => types::I32,
-                Opcode::AtomicStore16 => types::I16,
-                Opcode::AtomicStore8 => types::I8,
-                _ => unreachable!(),
-            };
+            let ty = ctx.input_ty(insn, 0);
             assert!(is_valid_atomic_transaction_ty(ty));
 
             let mem = lower_address(ctx, &inputs[1..], 0, flags);
diff --git a/cranelift/codegen/src/isa/x64/lower.rs b/cranelift/codegen/src/isa/x64/lower.rs
index 19433dc71e..b4c05cee8f 100644
--- a/cranelift/codegen/src/isa/x64/lower.rs
+++ b/cranelift/codegen/src/isa/x64/lower.rs
@@ -5825,10 +5825,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
             ctx.emit(Inst::gen_move(dst, regs::rax(), types::I64));
         }
 
-        Opcode::AtomicLoad
-        | Opcode::AtomicUload8
-        | Opcode::AtomicUload16
-        | Opcode::AtomicUload32 => {
+        Opcode::AtomicLoad => {
             // This is a normal load.  The x86-TSO memory model provides sufficient sequencing
             // to satisfy the CLIF synchronisation requirements for `AtomicLoad` without the
             // need for any fence instructions.
@@ -5850,21 +5847,11 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
             }
         }
 
-        Opcode::AtomicStore
-        | Opcode::AtomicStore32
-        | Opcode::AtomicStore16
-        | Opcode::AtomicStore8 => {
+        Opcode::AtomicStore => {
             // This is a normal store, followed by an `mfence` instruction.
             let data = put_input_in_reg(ctx, inputs[0]);
             let addr = lower_to_amode(ctx, inputs[1], 0);
-            let data_ty = ctx.input_ty(insn, 0);
-            let ty_access = match op {
-                Opcode::AtomicStore => data_ty,
-                Opcode::AtomicStore32 => types::I32,
-                Opcode::AtomicStore16 => types::I16,
-                Opcode::AtomicStore8 => types::I8,
-                _ => unreachable!(),
-            };
+            let ty_access = ctx.input_ty(insn, 0);
             assert!(is_valid_atomic_transaction_ty(ty_access));
 
             ctx.emit(Inst::store(ty_access, data, addr));
diff --git a/cranelift/filetests/filetests/isa/aarch64/atomic_load.clif b/cranelift/filetests/filetests/isa/aarch64/atomic_load.clif
index 31af721015..cc69eef05c 100644
--- a/cranelift/filetests/filetests/isa/aarch64/atomic_load.clif
+++ b/cranelift/filetests/filetests/isa/aarch64/atomic_load.clif
@@ -21,50 +21,75 @@ block0(v0: i64):
 ; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret
 
-function %atomic_uload_i32_i64(i64) -> i64 {
+function %atomic_load_i16(i64) -> i16 {
 block0(v0: i64):
-  v1 = atomic_uload32.i64 v0
+  v1 = atomic_load.i16 v0
   return v1
 }
 
+; check: ldarh w0, [x0]
+; nextln: ldp fp, lr, [sp], #16
+; nextln: ret
+
+function %atomic_load_i8(i64) -> i8 {
+block0(v0: i64):
+  v1 = atomic_load.i8 v0
+  return v1
+}
+
+; check: ldarb w0, [x0]
+; nextln: ldp fp, lr, [sp], #16
+; nextln: ret
+
+function %atomic_load_i32_i64(i64) -> i64 {
+block0(v0: i64):
+  v1 = atomic_load.i32 v0
+  v2 = uextend.i64 v1
+  return v2
+}
+
 ; check: ldar w0, [x0]
 ; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret
 
-function %atomic_uload_i16_i32(i64) -> i32 {
+function %atomic_load_i16_i64(i64) -> i64 {
 block0(v0: i64):
-  v1 = atomic_uload16.i32 v0
-  return v1
+  v1 = atomic_load.i16 v0
+  v2 = uextend.i64 v1
+  return v2
 }
 
 ; check: ldarh w0, [x0]
 ; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret
 
-function %atomic_uload_i16_i64(i64) -> i64 {
+function %atomic_load_i8_i64(i64) -> i64 {
 block0(v0: i64):
-  v1 = atomic_uload16.i64 v0
-  return v1
+  v1 = atomic_load.i8 v0
+  v2 = uextend.i64 v1
+  return v2
+}
+
+; check: ldarb w0, [x0]
+; nextln: ldp fp, lr, [sp], #16
+; nextln: ret
+
+function %atomic_load_i16_i32(i64) -> i32 {
+block0(v0: i64):
+  v1 = atomic_load.i16 v0
+  v2 = uextend.i32 v1
+  return v2
 }
 
 ; check: ldarh w0, [x0]
 ; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret
 
-function %atomic_uload_i8_i32(i64) -> i32 {
+function %atomic_load_i8_i32(i64) -> i32 {
 block0(v0: i64):
-  v1 = atomic_uload8.i32 v0
-  return v1
-}
-
-; check: ldarb w0, [x0]
-; nextln: ldp fp, lr, [sp], #16
-; nextln: ret
-
-function %atomic_uload_i8_i64(i64) -> i64 {
-block0(v0: i64):
-  v1 = atomic_uload8.i64 v0
-  return v1
+  v1 = atomic_load.i8 v0
+  v2 = uextend.i32 v1
+  return v2
 }
 
 ; check: ldarb w0, [x0]
diff --git a/cranelift/filetests/filetests/isa/aarch64/atomic_store.clif b/cranelift/filetests/filetests/isa/aarch64/atomic_store.clif
index 9c0cd529d7..89d3745278 100644
--- a/cranelift/filetests/filetests/isa/aarch64/atomic_store.clif
+++ b/cranelift/filetests/filetests/isa/aarch64/atomic_store.clif
@@ -21,52 +21,82 @@ block0(v0: i32, v1: i64):
 ; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret
 
-function %atomic_ustore_i32_i64(i64, i64) {
-block0(v0: i64, v1: i64):
-  atomic_store32.i64 v0, v1
+function %atomic_store_i16(i16, i64) {
+block0(v0: i16, v1: i64):
+  atomic_store.i16 v0, v1
   return
 }
 
+; check: stlrh w0, [x1]
+; nextln: ldp fp, lr, [sp], #16
+; nextln: ret
+
+function %atomic_store_i8(i8, i64) {
+block0(v0: i8, v1: i64):
+  atomic_store.i8 v0, v1
+  return
+}
+
+; check: stlrb w0, [x1]
+; nextln: ldp fp, lr, [sp], #16
+; nextln: ret
+
+function %atomic_store_i64_i32(i64, i64) {
+block0(v0: i64, v1: i64):
+  v2 = ireduce.i32 v0
+  atomic_store.i32 v2, v1
+  return
+}
+
+; check-not: uxt
 ; check: stlr w0, [x1]
 ; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret
 
-function %atomic_ustore_i16_i32(i32, i64) {
-block0(v0: i32, v1: i64):
-  atomic_store16.i32 v0, v1
+function %atomic_store_i64_i16(i64, i64) {
+block0(v0: i64, v1: i64):
+  v2 = ireduce.i16 v0
+  atomic_store.i16 v2, v1
   return
 }
 
+; check-not: uxt
 ; check: stlrh w0, [x1]
 ; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret
 
-function %atomic_ustore_i16_i64(i64, i64) {
+function %atomic_store_i64_i8(i64, i64) {
 block0(v0: i64, v1: i64):
-  atomic_store16.i64 v0, v1
+  v2 = ireduce.i8 v0
+  atomic_store.i8 v2, v1
   return
 }
 
+; check-not: uxt
+; check: stlrb w0, [x1]
+; nextln: ldp fp, lr, [sp], #16
+; nextln: ret
+
+function %atomic_store_i32_i16(i32, i64) {
+block0(v0: i32, v1: i64):
+  v2 = ireduce.i16 v0
+  atomic_store.i16 v2, v1
+  return
+}
+
+; check-not: uxt
 ; check: stlrh w0, [x1]
 ; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret
 
-function %atomic_ustore_i8_i32(i32, i64) {
+function %atomic_store_i32_i8(i32, i64) {
 block0(v0: i32, v1: i64):
-  atomic_store8.i32 v0, v1
-  return
-}
-
-; check: stlrb w0, [x1]
-; nextln: ldp fp, lr, [sp], #16
-; nextln: ret
-
-function %atomic_ustore_i8_i64(i64, i64) {
-block0(v0: i64, v1: i64):
-  atomic_store8.i64 v0, v1
+  v2 = ireduce.i8 v0
+  atomic_store.i8 v2, v1
   return
 }
 
+; check-not: uxt
 ; check: stlrb w0, [x1]
 ; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret
diff --git a/cranelift/filetests/filetests/isa/s390x/atomic_load-little.clif b/cranelift/filetests/filetests/isa/s390x/atomic_load-little.clif
index 629c432370..5556176bbb 100644
--- a/cranelift/filetests/filetests/isa/s390x/atomic_load-little.clif
+++ b/cranelift/filetests/filetests/isa/s390x/atomic_load-little.clif
@@ -41,29 +41,29 @@ block0:
 ; check:  larl %r1, %sym + 0 ; lrv %r2, 0(%r1)
 ; nextln: br %r14
 
-function %atomic_load_i16(i64) -> i32 {
+function %atomic_load_i16(i64) -> i16 {
 block0(v0: i64):
-  v1 = atomic_uload16.i32 little v0
+  v1 = atomic_load.i16 little v0
   return v1
 }
 
 ; check:  lrvh %r2, 0(%r2)
 ; nextln: br %r14
 
-function %atomic_load_i16_sym() -> i32 {
+function %atomic_load_i16_sym() -> i16 {
   gv0 = symbol colocated %sym
 block0:
   v0 = symbol_value.i64 gv0
-  v1 = atomic_uload16.i32 little v0
+  v1 = atomic_load.i16 little v0
   return v1
 }
 
 ; check:  larl %r1, %sym + 0 ; lrvh %r2, 0(%r1)
 ; nextln: br %r14
 
-function %atomic_load_i8(i64) -> i32 {
+function %atomic_load_i8(i64) -> i8 {
 block0(v0: i64):
-  v1 = atomic_uload8.i32 little v0
+  v1 = atomic_load.i8 little v0
   return v1
 }
 
diff --git a/cranelift/filetests/filetests/isa/s390x/atomic_load.clif b/cranelift/filetests/filetests/isa/s390x/atomic_load.clif
index 9a58de52d1..b361aaa4c7 100644
--- a/cranelift/filetests/filetests/isa/s390x/atomic_load.clif
+++ b/cranelift/filetests/filetests/isa/s390x/atomic_load.clif
@@ -41,29 +41,29 @@ block0:
 ; check:  lrl %r2, %sym + 0
 ; nextln: br %r14
 
-function %atomic_load_i16(i64) -> i32 {
+function %atomic_load_i16(i64) -> i16 {
 block0(v0: i64):
-  v1 = atomic_uload16.i32 v0
+  v1 = atomic_load.i16 v0
   return v1
 }
 
 ; check:  llh %r2, 0(%r2)
 ; nextln: br %r14
 
-function %atomic_load_i16_sym() -> i32 {
+function %atomic_load_i16_sym() -> i16 {
   gv0 = symbol colocated %sym
 block0:
   v0 = symbol_value.i64 gv0
-  v1 = atomic_uload16.i32 v0
+  v1 = atomic_load.i16 v0
   return v1
 }
 
 ; check:  llhrl %r2, %sym + 0
 ; nextln: br %r14
 
-function %atomic_load_i8(i64) -> i32 {
+function %atomic_load_i8(i64) -> i8 {
 block0(v0: i64):
-  v1 = atomic_uload8.i32 v0
+  v1 = atomic_load.i8 v0
   return v1
 }
 
diff --git a/cranelift/interpreter/src/step.rs b/cranelift/interpreter/src/step.rs
index 2b892de57c..c9c0372980 100644
--- a/cranelift/interpreter/src/step.rs
+++ b/cranelift/interpreter/src/step.rs
@@ -625,14 +625,8 @@ where
         Opcode::Iconcat => assign(Value::concat(arg(0)?, arg(1)?)?),
         Opcode::AtomicRmw => unimplemented!("AtomicRmw"),
         Opcode::AtomicCas => unimplemented!("AtomicCas"),
-        Opcode::AtomicLoad
-        | Opcode::AtomicUload8
-        | Opcode::AtomicUload16
-        | Opcode::AtomicUload32 => unimplemented!("AtomicLoad"),
-        Opcode::AtomicStore
-        | Opcode::AtomicStore8
-        | Opcode::AtomicStore16
-        | Opcode::AtomicStore32 => unimplemented!("AtomicStore"),
+        Opcode::AtomicLoad => unimplemented!("AtomicLoad"),
+        Opcode::AtomicStore => unimplemented!("AtomicStore"),
         Opcode::Fence => unimplemented!("Fence"),
         Opcode::WideningPairwiseDotProductS => unimplemented!("WideningPairwiseDotProductS"),
         Opcode::SqmulRoundSat => unimplemented!("SqmulRoundSat"),