diff --git a/cranelift/codegen/meta/src/gen_inst.rs b/cranelift/codegen/meta/src/gen_inst.rs
index 01ccefb7e0..4882969990 100644
--- a/cranelift/codegen/meta/src/gen_inst.rs
+++ b/cranelift/codegen/meta/src/gen_inst.rs
@@ -66,7 +66,7 @@ fn gen_formats(formats: &[&InstructionFormat], fmt: &mut Formatter) {
 /// 16 bytes on 64-bit architectures. If more space is needed to represent an instruction, use a
 /// `ValueList` to store the additional information out of line.
 fn gen_instruction_data(formats: &[&InstructionFormat], fmt: &mut Formatter) {
-    fmt.line("#[derive(Copy, Clone, Debug, PartialEq, Hash)]");
+    fmt.line("#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]");
     fmt.line(r#"#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]"#);
     fmt.line("#[allow(missing_docs)]");
     fmtln!(fmt, "pub enum InstructionData {");
diff --git a/cranelift/codegen/src/egraph.rs b/cranelift/codegen/src/egraph.rs
index 2cbe412e34..e12b6fed39 100644
--- a/cranelift/codegen/src/egraph.rs
+++ b/cranelift/codegen/src/egraph.rs
@@ -7,13 +7,14 @@ use crate::dominator_tree::DominatorTree;
 use crate::egraph::domtree::DomTreeWithChildren;
 use crate::egraph::elaborate::Elaborator;
 use crate::fx::FxHashSet;
-use crate::inst_predicates::is_pure_for_egraph;
+use crate::inst_predicates::{is_mergeable_for_egraph, is_pure_for_egraph};
 use crate::ir::{
-    DataFlowGraph, Function, Inst, InstructionData, Type, Value, ValueDef, ValueListPool,
+    Block, DataFlowGraph, Function, Inst, InstructionData, Type, Value, ValueDef, ValueListPool,
 };
 use crate::loop_analysis::LoopAnalysis;
 use crate::opts::generated_code::ContextIter;
 use crate::opts::IsleContext;
+use crate::scoped_hash_map::{Entry as ScopedEntry, ScopedHashMap};
 use crate::trace;
 use crate::unionfind::UnionFind;
 use cranelift_entity::packed_option::ReservedValue;
@@ -77,6 +78,7 @@ where
     pub(crate) func: &'opt mut Function,
     pub(crate) value_to_opt_value: &'opt mut SecondaryMap<Value, Value>,
     pub(crate) gvn_map: &'opt mut CtxHashMap<(Type, InstructionData), Value>,
+    pub(crate) effectful_gvn_map: &'opt mut ScopedHashMap<(Type, InstructionData), Value>,
     pub(crate) eclasses: &'opt mut UnionFind<Value>,
     pub(crate) remat_values: &'opt mut FxHashSet<Value>,
     pub(crate) stats: &'opt mut Stats,
@@ -285,10 +287,49 @@ where
     fn optimize_skeleton_inst(&mut self, inst: Inst) -> bool {
         self.stats.skeleton_inst += 1;
 
-        // If a load or store, process it with the alias analysis to see
-        // if we can optimize it (rewrite in terms of an earlier load or
-        // stored value).
-        if let Some(new_result) =
+        // First, can we try to deduplicate? We need to keep some copy
+        // of the instruction around because it's side-effecting, but
+        // we may be able to reuse an earlier instance of it.
+        if is_mergeable_for_egraph(self.func, inst) {
+            let result = self.func.dfg.inst_results(inst)[0];
+            trace!(" -> mergeable side-effecting op {}", inst);
+
+            // Does this instruction already exist? If so, add entries to
+            // the value-map to rewrite uses of its results to the results
+            // of the original (existing) instruction. If not, optimize
+            // the new instruction.
+            //
+            // Note that we use the "effectful GVN map", which is
+            // scoped: because effectful ops are not removed from the
+            // skeleton (`Layout`), we need to be mindful of whether
+            // our current position is dominated by an instance of the
+            // instruction. (See #5796 for details.)
+            let ty = self.func.dfg.ctrl_typevar(inst);
+            match self
+                .effectful_gvn_map
+                .entry((ty, self.func.dfg.insts[inst].clone()))
+            {
+                ScopedEntry::Occupied(o) => {
+                    let orig_result = *o.get();
+                    // Hit in GVN map -- reuse value.
+                    self.value_to_opt_value[result] = orig_result;
+                    self.eclasses.union(orig_result, result);
+                    trace!(" -> merges result {} to {}", result, orig_result);
+                    true
+                }
+                ScopedEntry::Vacant(v) => {
+                    // Otherwise, insert it into the value-map.
+                    self.value_to_opt_value[result] = result;
+                    v.insert(result);
+                    trace!(" -> inserts as new (no GVN)");
+                    false
+                }
+            }
+        }
+        // Otherwise, if a load or store, process it with the alias
+        // analysis to see if we can optimize it (rewrite in terms of
+        // an earlier load or stored value).
+        else if let Some(new_result) =
             self.alias_analysis
                 .process_inst(self.func, self.alias_analysis_state, inst)
         {
@@ -382,82 +423,126 @@ impl<'a> EgraphPass<'a> {
         let mut cursor = FuncCursor::new(self.func);
         let mut value_to_opt_value: SecondaryMap<Value, Value> =
             SecondaryMap::with_default(Value::reserved_value());
+        // Map from instruction to value for hash-consing of pure ops
+        // into the egraph. This can be a standard (non-scoped)
+        // hashmap because pure ops have no location: they are
+        // "outside of" control flow.
+        //
+        // Note also that we keep the controlling typevar (the `Type`
+        // in the tuple below) because it may disambiguate
+        // instructions that are identical except for type.
         let mut gvn_map: CtxHashMap<(Type, InstructionData), Value> =
             CtxHashMap::with_capacity(cursor.func.dfg.num_values());
+        // Map from instruction to value for GVN'ing of effectful but
+        // idempotent ops, which remain in the side-effecting
+        // skeleton. This needs to be scoped because we cannot
+        // deduplicate one instruction to another that is in a
+        // non-dominating block.
+        //
+        // Note that we can use a ScopedHashMap here without the
+        // "context" (as needed by CtxHashMap) because in practice the
+        // ops we want to GVN have all their args inline. Equality on
+        // the InstructionData itself is conservative: two insts whose
+        // struct contents compare shallowly equal are definitely
+        // identical, but identical insts in a deep-equality sense may
+        // not compare shallowly equal, due to list indirection. This
+        // is fine for GVN, because it is still sound to skip any
+        // given GVN opportunity (and keep the original instructions).
+        //
+        // As above, we keep the controlling typevar here as part of
+        // the key: effectful instructions may (as for pure
+        // instructions) be differentiated only on the type.
+        let mut effectful_gvn_map: ScopedHashMap<(Type, InstructionData), Value> =
+            ScopedHashMap::new();
 
         // In domtree preorder, visit blocks. (TODO: factor out an
         // iterator from this and elaborator.)
         let root = self.domtree_children.root();
-        let mut block_stack = vec![root];
-        while let Some(block) = block_stack.pop() {
-            // We popped this block; push children
-            // immediately, then process this block.
-            block_stack.extend(self.domtree_children.children(block));
+        enum StackEntry {
+            Visit(Block),
+            Pop,
+        }
+        let mut block_stack = vec![StackEntry::Visit(root)];
+        while let Some(entry) = block_stack.pop() {
+            match entry {
+                StackEntry::Visit(block) => {
+                    // We popped this block; push children
+                    // immediately, then process this block.
+                    block_stack.push(StackEntry::Pop);
+                    block_stack
+                        .extend(self.domtree_children.children(block).map(StackEntry::Visit));
+                    effectful_gvn_map.increment_depth();
 
-            trace!("Processing block {}", block);
-            cursor.set_position(CursorPosition::Before(block));
+                    trace!("Processing block {}", block);
+                    cursor.set_position(CursorPosition::Before(block));
 
-            let mut alias_analysis_state = self.alias_analysis.block_starting_state(block);
+                    let mut alias_analysis_state = self.alias_analysis.block_starting_state(block);
 
-            for &param in cursor.func.dfg.block_params(block) {
-                trace!("creating initial singleton eclass for blockparam {}", param);
-                self.eclasses.add(param);
-                value_to_opt_value[param] = param;
-            }
-            while let Some(inst) = cursor.next_inst() {
-                trace!("Processing inst {}", inst);
-
-                // While we're passing over all insts, create initial
-                // singleton eclasses for all result and blockparam
-                // values.  Also do initial analysis of all inst
-                // results.
-                for &result in cursor.func.dfg.inst_results(inst) {
-                    trace!("creating initial singleton eclass for {}", result);
-                    self.eclasses.add(result);
-                }
-
-                // Rewrite args of *all* instructions using the
-                // value-to-opt-value map.
-                cursor.func.dfg.resolve_aliases_in_arguments(inst);
-                cursor.func.dfg.map_inst_values(inst, |_, arg| {
-                    let new_value = value_to_opt_value[arg];
-                    trace!("rewriting arg {} of inst {} to {}", arg, inst, new_value);
-                    debug_assert_ne!(new_value, Value::reserved_value());
-                    new_value
-                });
-
-                // Build a context for optimization, with borrows of
-                // state. We can't invoke a method on `self` because
-                // we've borrowed `self.func` mutably (as
-                // `cursor.func`) so we pull apart the pieces instead
-                // here.
-                let mut ctx = OptimizeCtx {
-                    func: cursor.func,
-                    value_to_opt_value: &mut value_to_opt_value,
-                    gvn_map: &mut gvn_map,
-                    eclasses: &mut self.eclasses,
-                    rewrite_depth: 0,
-                    subsume_values: FxHashSet::default(),
-                    remat_values: &mut self.remat_values,
-                    stats: &mut self.stats,
-                    alias_analysis: self.alias_analysis,
-                    alias_analysis_state: &mut alias_analysis_state,
-                };
-
-                if is_pure_for_egraph(ctx.func, inst) {
-                    // Insert into GVN map and optimize any new nodes
-                    // inserted (recursively performing this work for
-                    // any nodes the optimization rules produce).
-                    let inst = NewOrExistingInst::Existing(inst);
-                    ctx.insert_pure_enode(inst);
-                    // We've now rewritten all uses, or will when we
-                    // see them, and the instruction exists as a pure
-                    // enode in the eclass, so we can remove it.
-                    cursor.remove_inst_and_step_back();
-                } else {
-                    if ctx.optimize_skeleton_inst(inst) {
-                        cursor.remove_inst_and_step_back();
+                    for &param in cursor.func.dfg.block_params(block) {
+                        trace!("creating initial singleton eclass for blockparam {}", param);
+                        self.eclasses.add(param);
+                        value_to_opt_value[param] = param;
                     }
+                    while let Some(inst) = cursor.next_inst() {
+                        trace!("Processing inst {}", inst);
+
+                        // While we're passing over all insts, create initial
+                        // singleton eclasses for all result and blockparam
+                        // values.  Also do initial analysis of all inst
+                        // results.
+                        for &result in cursor.func.dfg.inst_results(inst) {
+                            trace!("creating initial singleton eclass for {}", result);
+                            self.eclasses.add(result);
+                        }
+
+                        // Rewrite args of *all* instructions using the
+                        // value-to-opt-value map.
+                        cursor.func.dfg.resolve_aliases_in_arguments(inst);
+                        cursor.func.dfg.map_inst_values(inst, |_, arg| {
+                            let new_value = value_to_opt_value[arg];
+                            trace!("rewriting arg {} of inst {} to {}", arg, inst, new_value);
+                            debug_assert_ne!(new_value, Value::reserved_value());
+                            new_value
+                        });
+
+                        // Build a context for optimization, with borrows of
+                        // state. We can't invoke a method on `self` because
+                        // we've borrowed `self.func` mutably (as
+                        // `cursor.func`) so we pull apart the pieces instead
+                        // here.
+                        let mut ctx = OptimizeCtx {
+                            func: cursor.func,
+                            value_to_opt_value: &mut value_to_opt_value,
+                            gvn_map: &mut gvn_map,
+                            effectful_gvn_map: &mut effectful_gvn_map,
+                            eclasses: &mut self.eclasses,
+                            rewrite_depth: 0,
+                            subsume_values: FxHashSet::default(),
+                            remat_values: &mut self.remat_values,
+                            stats: &mut self.stats,
+                            alias_analysis: self.alias_analysis,
+                            alias_analysis_state: &mut alias_analysis_state,
+                        };
+
+                        if is_pure_for_egraph(ctx.func, inst) {
+                            // Insert into GVN map and optimize any new nodes
+                            // inserted (recursively performing this work for
+                            // any nodes the optimization rules produce).
+                            let inst = NewOrExistingInst::Existing(inst);
+                            ctx.insert_pure_enode(inst);
+                            // We've now rewritten all uses, or will when we
+                            // see them, and the instruction exists as a pure
+                            // enode in the eclass, so we can remove it.
+                            cursor.remove_inst_and_step_back();
+                        } else {
+                            if ctx.optimize_skeleton_inst(inst) {
+                                cursor.remove_inst_and_step_back();
+                            }
+                        }
+                    }
+                }
+                StackEntry::Pop => {
+                    effectful_gvn_map.decrement_depth();
                 }
             }
         }
diff --git a/cranelift/codegen/src/inst_predicates.rs b/cranelift/codegen/src/inst_predicates.rs
index 9b284e8b8c..f82f41daac 100644
--- a/cranelift/codegen/src/inst_predicates.rs
+++ b/cranelift/codegen/src/inst_predicates.rs
@@ -73,6 +73,25 @@ pub fn is_pure_for_egraph(func: &Function, inst: Inst) -> bool {
     has_one_result && (is_readonly_load || (!op.can_load() && !trivially_has_side_effects(op)))
 }
 
+/// Can the given instruction be merged into another copy of itself?
+/// These instructions may have side-effects, but as long as we retain
+/// the first instance of the instruction, the second and further
+/// instances are redundant if they would produce the same trap or
+/// result.
+pub fn is_mergeable_for_egraph(func: &Function, inst: Inst) -> bool {
+    let op = func.dfg.insts[inst].opcode();
+    // We can only merge one-result operators due to the way that GVN
+    // is structured in the egraph implementation.
+    let has_one_result = func.dfg.inst_results(inst).len() == 1;
+    has_one_result
+        // Loads/stores are handled by alias analysis and not
+        // otherwise mergeable.
+        && !op.can_load()
+        && !op.can_store()
+        // Can only have idempotent side-effects.
+        && (!has_side_effect(func, inst) || op.side_effects_idempotent())
+}
+
 /// Does the given instruction have any side-effect as per [has_side_effect], or else is a load,
 /// but not the get_pinned_reg opcode?
 pub fn has_lowering_side_effect(func: &Function, inst: Inst) -> bool {
diff --git a/cranelift/codegen/src/ir/instructions.rs b/cranelift/codegen/src/ir/instructions.rs
index 97e0ac0b5a..f28cc45d2e 100644
--- a/cranelift/codegen/src/ir/instructions.rs
+++ b/cranelift/codegen/src/ir/instructions.rs
@@ -45,7 +45,7 @@ pub type ValueListPool = entity::ListPool<Value>;
 /// The BlockCall::new function guarantees this layout by requiring a block argument that's written
 /// in as the first element of the EntityList. Any subsequent entries are always assumed to be real
 /// Values.
-#[derive(Debug, Clone, Copy, PartialEq, Hash)]
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
 #[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
 pub struct BlockCall {
     /// The underlying storage for the BlockCall. The first element of the values EntityList is
diff --git a/cranelift/entity/src/list.rs b/cranelift/entity/src/list.rs
index 659b94a9bc..682eb256fa 100644
--- a/cranelift/entity/src/list.rs
+++ b/cranelift/entity/src/list.rs
@@ -62,7 +62,7 @@ use serde::{Deserialize, Serialize};
 ///
 /// The index stored in an `EntityList` points to part 2, the list elements. The value 0 is
 /// reserved for the empty list which isn't allocated in the vector.
-#[derive(Clone, Copy, Debug, PartialEq, Hash)]
+#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
 #[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
 pub struct EntityList<T: EntityRef + ReservedValue> {
     index: u32,
diff --git a/cranelift/filetests/filetests/egraph/issue-5796.clif b/cranelift/filetests/filetests/egraph/issue-5796.clif
new file mode 100644
index 0000000000..66963f5c4e
--- /dev/null
+++ b/cranelift/filetests/filetests/egraph/issue-5796.clif
@@ -0,0 +1,76 @@
+test optimize
+set opt_level=speed
+target x86_64
+
+function %f0(i32, i32, i32) -> i32 {
+block0(v0: i32, v1: i32, v2: i32):
+  brif v0, block1, block2
+
+block1:
+  v3 = udiv v1, v2
+  return v3
+
+block2:
+  v4 = udiv v1, v2
+  brif v1, block3, block4
+
+block3:
+  return v4
+
+block4:
+  return v4
+}
+
+; check: block0(v0: i32, v1: i32, v2: i32):
+; check:     brif v0, block1, block2
+
+; check: block1:
+; check:     v3 = udiv.i32 v1, v2
+; check:     return v3
+
+; check: block2:
+; check:     v4 = udiv.i32 v1, v2
+; check:     brif.i32 v1, block3, block4
+
+; check: block3:
+; check:     return v4
+
+; check: block4:
+; check:     return v4
+
+function %f1(i32, i32, i32) -> i32 {
+block0(v0: i32, v1: i32, v2: i32):
+  brif v0, block1, block2
+
+block1:
+  v3 = udiv v1, v2
+  return v3
+
+block2:
+  v4 = udiv v1, v2
+  brif v1, block3, block4
+
+block3:
+  v5 = udiv v1, v2
+  return v5
+
+block4:
+  return v4
+}
+
+; check: block0(v0: i32, v1: i32, v2: i32):
+; check:     brif v0, block1, block2
+
+; check: block1:
+; check:     v3 = udiv.i32 v1, v2
+; check:     return v3
+
+; check: block2:
+; check:     v4 = udiv.i32 v1, v2
+; check:     brif.i32 v1, block3, block4
+
+; check: block3:
+; check:     return v4
+
+; check: block4:
+; check:     return v4
diff --git a/cranelift/filetests/filetests/wasm/duplicate-loads-dynamic-memory-egraph.wat b/cranelift/filetests/filetests/wasm/duplicate-loads-dynamic-memory-egraph.wat
new file mode 100644
index 0000000000..b6d4e88dcf
--- /dev/null
+++ b/cranelift/filetests/filetests/wasm/duplicate-loads-dynamic-memory-egraph.wat
@@ -0,0 +1,92 @@
+;;! target = "x86_64"
+;;!
+;;! optimize = true
+;;!
+;;! settings = [
+;;!   "enable_heap_access_spectre_mitigation=true",
+;;!   "opt_level=speed_and_size",
+;;!   "use_egraphs=true"
+;;! ]
+;;!
+;;! [globals.vmctx]
+;;! type = "i64"
+;;! vmctx = true
+;;!
+;;! [globals.heap_base]
+;;! type = "i64"
+;;! load = { base = "vmctx", offset = 0 }
+;;!
+;;! [globals.heap_bound]
+;;! type = "i64"
+;;! load = { base = "vmctx", offset = 8 }
+;;!
+;;! [[heaps]]
+;;! base = "heap_base"
+;;! min_size = 0
+;;! offset_guard_size = 0xffffffff
+;;! index_type = "i32"
+;;! style = { kind = "dynamic", bound = "heap_bound" }
+
+(module
+  (memory (export "memory") 0)
+  (func (export "load-without-offset") (param i32) (result i32 i32)
+    local.get 0
+    i32.load
+    local.get 0
+    i32.load
+  )
+  (func (export "load-with-offset") (param i32) (result i32 i32)
+    local.get 0
+    i32.load offset=1234
+    local.get 0
+    i32.load offset=1234
+  )
+)
+
+;; function u0:0(i32, i64 vmctx) -> i32, i32 fast {
+;;     gv0 = vmctx
+;;     gv1 = load.i64 notrap aligned gv0+8
+;;     gv2 = load.i64 notrap aligned gv0
+;;
+;;                                 block0(v0: i32, v1: i64):
+;; @0057                               v4 = uextend.i64 v0
+;; @0057                               v5 = iconst.i64 4
+;; @0057                               v6 = uadd_overflow_trap v4, v5, heap_oob  ; v5 = 4
+;; @0057                               v7 = load.i64 notrap aligned v1+8
+;; @0057                               v8 = load.i64 notrap aligned v1
+;; @0057                               v11 = icmp ugt v6, v7
+;; @0057                               v10 = iconst.i64 0
+;; @0057                               v9 = iadd v8, v4
+;; @0057                               v12 = select_spectre_guard v11, v10, v9  ; v10 = 0
+;; @0057                               v13 = load.i32 little heap v12
+;;                                     v2 -> v13
+;; @005f                               jump block1
+;;
+;;                                 block1:
+;; @005f                               return v13, v13
+;; }
+;;
+;; function u0:1(i32, i64 vmctx) -> i32, i32 fast {
+;;     gv0 = vmctx
+;;     gv1 = load.i64 notrap aligned gv0+8
+;;     gv2 = load.i64 notrap aligned gv0
+;;
+;;                                 block0(v0: i32, v1: i64):
+;; @0064                               v4 = uextend.i64 v0
+;; @0064                               v5 = iconst.i64 1238
+;; @0064                               v6 = uadd_overflow_trap v4, v5, heap_oob  ; v5 = 1238
+;; @0064                               v7 = load.i64 notrap aligned v1+8
+;; @0064                               v8 = load.i64 notrap aligned v1
+;; @0064                               v12 = icmp ugt v6, v7
+;; @0064                               v11 = iconst.i64 0
+;; @0064                               v9 = iadd v8, v4
+;;                                     v26 = iconst.i64 1234
+;; @0064                               v10 = iadd v9, v26  ; v26 = 1234
+;; @0064                               v13 = select_spectre_guard v12, v11, v10  ; v11 = 0
+;; @0064                               v14 = load.i32 little heap v13
+;;                                     v2 -> v14
+;; @006e                               jump block1
+;;
+;;                                 block1:
+;; @006e                               return v14, v14
+;; }