diff --git a/cranelift/codegen/src/ir/memflags.rs b/cranelift/codegen/src/ir/memflags.rs
index 87fd6bf3ab..3c9c8c98ba 100644
--- a/cranelift/codegen/src/ir/memflags.rs
+++ b/cranelift/codegen/src/ir/memflags.rs
@@ -6,15 +6,31 @@ enum FlagBit {
     Notrap,
     Aligned,
     Readonly,
+    LittleEndian,
+    BigEndian,
 }
 
-const NAMES: [&str; 3] = ["notrap", "aligned", "readonly"];
+const NAMES: [&str; 5] = ["notrap", "aligned", "readonly", "little", "big"];
+
+/// Endianness of a memory access.
+#[derive(Clone, Copy, PartialEq, Eq, Debug, Hash)]
+pub enum Endianness {
+    /// Little-endian
+    Little,
+    /// Big-endian
+    Big,
+}
 
 /// Flags for memory operations like load/store.
 ///
 /// Each of these flags introduce a limited form of undefined behavior. The flags each enable
 /// certain optimizations that need to make additional assumptions. Generally, the semantics of a
 /// program does not change when a flag is removed, but adding a flag will.
+///
+/// In addition, the flags determine the endianness of the memory access.  By default,
+/// any memory access uses the native endianness determined by the target ISA.  This can
+/// be overridden for individual accesses by explicitly specifying little- or big-endian
+/// semantics via the flags.
 #[derive(Clone, Copy, Debug, Hash, PartialEq, Eq)]
 pub struct MemFlags {
     bits: u8,
@@ -48,16 +64,48 @@ impl MemFlags {
     /// Set a flag bit by name.
     ///
     /// Returns true if the flag was found and set, false for an unknown flag name.
+    /// Will also return false when trying to set inconsistent endianness flags.
     pub fn set_by_name(&mut self, name: &str) -> bool {
         match NAMES.iter().position(|&s| s == name) {
             Some(bit) => {
-                self.bits |= 1 << bit;
-                true
+                let bits = self.bits | 1 << bit;
+                if (bits & (1 << FlagBit::LittleEndian as usize)) != 0
+                    && (bits & (1 << FlagBit::BigEndian as usize)) != 0
+                {
+                    false
+                } else {
+                    self.bits = bits;
+                    true
+                }
             }
             None => false,
         }
     }
 
+    /// Return endianness of the memory access.  This will return the endianness
+    /// explicitly specified by the flags if any, and will default to the native
+    /// endianness otherwise.  The native endianness has to be provided by the
+    /// caller since it is not explicitly encoded in CLIF IR -- this allows a
+    /// front end to create IR without having to know the target endianness.
+    pub fn endianness(self, native_endianness: Endianness) -> Endianness {
+        if self.read(FlagBit::LittleEndian) {
+            Endianness::Little
+        } else if self.read(FlagBit::BigEndian) {
+            Endianness::Big
+        } else {
+            native_endianness
+        }
+    }
+
+    /// Set endianness of the memory access.
+    pub fn set_endianness(&mut self, endianness: Endianness) {
+        match endianness {
+            Endianness::Little => self.set(FlagBit::LittleEndian),
+            Endianness::Big => self.set(FlagBit::BigEndian),
+        };
+        assert!(!(self.read(FlagBit::LittleEndian) && self.read(FlagBit::BigEndian)));
+    }
+
     /// Test if the `notrap` flag is set.
     ///
     /// Normally, trapping is part of the semantics of a load/store operation. If the platform
diff --git a/cranelift/codegen/src/ir/mod.rs b/cranelift/codegen/src/ir/mod.rs
index 4dbe90df34..c5e827db3d 100644
--- a/cranelift/codegen/src/ir/mod.rs
+++ b/cranelift/codegen/src/ir/mod.rs
@@ -50,7 +50,7 @@ pub use crate::ir::instructions::{
 pub use crate::ir::jumptable::JumpTableData;
 pub use crate::ir::layout::Layout;
 pub use crate::ir::libcall::{get_probestack_funcref, LibCall};
-pub use crate::ir::memflags::MemFlags;
+pub use crate::ir::memflags::{Endianness, MemFlags};
 pub use crate::ir::progpoint::{ExpandedProgramPoint, ProgramOrder, ProgramPoint};
 pub use crate::ir::sourceloc::SourceLoc;
 pub use crate::ir::stackslot::{StackLayoutInfo, StackSlotData, StackSlotKind, StackSlots};
diff --git a/cranelift/codegen/src/isa/mod.rs b/cranelift/codegen/src/isa/mod.rs
index a1a4c3c397..1ce10a155e 100644
--- a/cranelift/codegen/src/isa/mod.rs
+++ b/cranelift/codegen/src/isa/mod.rs
@@ -235,6 +235,14 @@ pub trait TargetIsa: fmt::Display + Send + Sync {
         CallConv::triple_default(self.triple())
     }
 
+    /// Get the endianness of this ISA.
+    fn endianness(&self) -> ir::Endianness {
+        match self.triple().endianness().unwrap() {
+            target_lexicon::Endianness::Little => ir::Endianness::Little,
+            target_lexicon::Endianness::Big => ir::Endianness::Big,
+        }
+    }
+
     /// Get the pointer type of this ISA.
     fn pointer_type(&self) -> ir::Type {
         ir::Type::int(u16::from(self.pointer_bits())).unwrap()
diff --git a/cranelift/codegen/src/legalizer/mod.rs b/cranelift/codegen/src/legalizer/mod.rs
index 1900b144ce..149a65b639 100644
--- a/cranelift/codegen/src/legalizer/mod.rs
+++ b/cranelift/codegen/src/legalizer/mod.rs
@@ -659,7 +659,7 @@ fn narrow_load(
     inst: ir::Inst,
     func: &mut ir::Function,
     _cfg: &mut ControlFlowGraph,
-    _isa: &dyn TargetIsa,
+    isa: &dyn TargetIsa,
 ) {
     let mut pos = FuncCursor::new(func).at_inst(inst);
     pos.use_srcloc(inst);
@@ -684,6 +684,10 @@ fn narrow_load(
         ptr,
         offset.try_add_i64(8).expect("load offset overflow"),
     );
+    let (al, ah) = match flags.endianness(isa.endianness()) {
+        ir::Endianness::Little => (al, ah),
+        ir::Endianness::Big => (ah, al),
+    };
     pos.func.dfg.replace(inst).iconcat(al, ah);
 }
 
@@ -692,7 +696,7 @@ fn narrow_store(
     inst: ir::Inst,
     func: &mut ir::Function,
     _cfg: &mut ControlFlowGraph,
-    _isa: &dyn TargetIsa,
+    isa: &dyn TargetIsa,
 ) {
     let mut pos = FuncCursor::new(func).at_inst(inst);
     pos.use_srcloc(inst);
@@ -708,6 +712,10 @@ fn narrow_store(
     };
 
     let (al, ah) = pos.ins().isplit(val);
+    let (al, ah) = match flags.endianness(isa.endianness()) {
+        ir::Endianness::Little => (al, ah),
+        ir::Endianness::Big => (ah, al),
+    };
     pos.ins().store(flags, al, ptr, offset);
     pos.ins().store(
         flags,
diff --git a/cranelift/wasm/src/code_translator.rs b/cranelift/wasm/src/code_translator.rs
index cf15787c18..2fe171842b 100644
--- a/cranelift/wasm/src/code_translator.rs
+++ b/cranelift/wasm/src/code_translator.rs
@@ -2056,7 +2056,9 @@ fn prepare_load<FE: FuncEnvironment + ?Sized>(
     // Note that we don't set `is_aligned` here, even if the load instruction's
     // alignment immediate says it's aligned, because WebAssembly's immediate
     // field is just a hint, while Cranelift's aligned flag needs a guarantee.
-    let flags = MemFlags::new();
+    // WebAssembly memory accesses are always little-endian.
+    let mut flags = MemFlags::new();
+    flags.set_endianness(ir::Endianness::Little);
 
     Ok((flags, base, offset.into()))
 }
@@ -2103,7 +2105,8 @@ fn translate_store<FE: FuncEnvironment + ?Sized>(
         builder,
     );
     // See the comments in `prepare_load` about the flags.
-    let flags = MemFlags::new();
+    let mut flags = MemFlags::new();
+    flags.set_endianness(ir::Endianness::Little);
     builder
         .ins()
         .Store(opcode, val_ty, flags, offset.into(), val, base);
@@ -2207,7 +2210,8 @@ fn translate_atomic_rmw<FE: FuncEnvironment + ?Sized>(
         finalise_atomic_mem_addr(linear_mem_addr, memarg, access_ty, builder, state, environ)?;
 
     // See the comments in `prepare_load` about the flags.
-    let flags = MemFlags::new();
+    let mut flags = MemFlags::new();
+    flags.set_endianness(ir::Endianness::Little);
     let mut res = builder
         .ins()
         .atomic_rmw(access_ty, flags, op, final_effective_address, arg2);
@@ -2260,7 +2264,8 @@ fn translate_atomic_cas<FE: FuncEnvironment + ?Sized>(
         finalise_atomic_mem_addr(linear_mem_addr, memarg, access_ty, builder, state, environ)?;
 
     // See the comments in `prepare_load` about the flags.
-    let flags = MemFlags::new();
+    let mut flags = MemFlags::new();
+    flags.set_endianness(ir::Endianness::Little);
     let mut res = builder
         .ins()
         .atomic_cas(flags, final_effective_address, expected, replacement);
@@ -2302,7 +2307,8 @@ fn translate_atomic_load<FE: FuncEnvironment + ?Sized>(
         finalise_atomic_mem_addr(linear_mem_addr, memarg, access_ty, builder, state, environ)?;
 
     // See the comments in `prepare_load` about the flags.
-    let flags = MemFlags::new();
+    let mut flags = MemFlags::new();
+    flags.set_endianness(ir::Endianness::Little);
     let mut res = builder
         .ins()
         .atomic_load(access_ty, flags, final_effective_address);
@@ -2348,7 +2354,8 @@ fn translate_atomic_store<FE: FuncEnvironment + ?Sized>(
         finalise_atomic_mem_addr(linear_mem_addr, memarg, access_ty, builder, state, environ)?;
 
     // See the comments in `prepare_load` about the flags.
-    let flags = MemFlags::new();
+    let mut flags = MemFlags::new();
+    flags.set_endianness(ir::Endianness::Little);
     builder
         .ins()
         .atomic_store(flags, data, final_effective_address);