diff --git a/cranelift/codegen/src/egraph/elaborate.rs b/cranelift/codegen/src/egraph/elaborate.rs
index d52927ffea..dbb14505a7 100644
--- a/cranelift/codegen/src/egraph/elaborate.rs
+++ b/cranelift/codegen/src/egraph/elaborate.rs
@@ -401,6 +401,17 @@ impl<'a> Elaborator<'a> {
                     let arg_values = &self.elab_result_stack[arg_idx..];
 
                     // Compute max loop depth.
+                    //
+                    // Note that if there are no arguments then this instruction
+                    // is allowed to get hoisted up one loop. This is not
+                    // usually used since no-argument values are things like
+                    // constants which are typically rematerialized, but for the
+                    // `vconst` instruction 128-bit constants aren't as easily
+                    // rematerialized. They're hoisted out of inner loops but
+                    // not to the function entry which may run the risk of
+                    // placing too much register pressure on the entire
+                    // function. This is modeled with the `.saturating_sub(1)`
+                    // as the default if there's otherwise no maximum.
                     let loop_hoist_level = arg_values
                         .iter()
                         .map(|&value| {
@@ -423,7 +434,7 @@ impl<'a> Elaborator<'a> {
                             hoist_level
                         })
                         .max()
-                        .unwrap_or(self.loop_stack.len());
+                        .unwrap_or(self.loop_stack.len().saturating_sub(1));
                     trace!(
                         " -> loop hoist level: {:?}; cur loop depth: {:?}, loop_stack: {:?}",
                         loop_hoist_level,
diff --git a/cranelift/filetests/filetests/egraph/licm.clif b/cranelift/filetests/filetests/egraph/licm.clif
index 7d44f53fe8..8d6a5ec329 100644
--- a/cranelift/filetests/filetests/egraph/licm.clif
+++ b/cranelift/filetests/filetests/egraph/licm.clif
@@ -36,3 +36,72 @@ block2(v9: i32):
 ; check:      v10 = iconst.i32 1
 ; check:      v4 = iadd.i32 v1, v10
 ; check:      return v4
+
+function %f(i64x2, i32) -> i64x2 {
+block0(v0: i64x2, v1: i32):
+    jump block1(v0, v1)
+
+block1(v2: i64x2, v3: i32):
+    v4 = vconst.i64x2 0x1000000010000000
+    v5 = iadd v2, v4
+    v6 = iconst.i32 1
+    v7 = isub v3, v6
+    brif v7, block1(v5, v7), block2(v5)
+
+block2(v8: i64x2):
+    return v8
+}
+
+; check:  block0(v0: i64x2, v1: i32):
+; nextln:     v4 = vconst.i64x2 const0
+; nextln:     jump block1(v0, v1)
+; check:  block1(v2: i64x2, v3: i32):
+; check:      v6 = iconst.i32 1
+; check:      v7 = isub v3, v6
+; check:      v5 = iadd v2, v4
+; check:      v8 -> v5
+; check:      brif v7, block1(v5, v7), block2
+; check:  block2:
+; check:      return v5
+
+;; don't lift vconst out of 2 loops, only the inner loop, based on the current
+;; heuristic.
+function %f(i64x2, i32, i32) -> i64x2 {
+block0(v0: i64x2, v1: i32, v2: i32):
+    jump block1(v0, v1, v2)
+
+block1(v3: i64x2, v4: i32, v5: i32):
+    jump block2(v3, v4)
+
+block2(v6: i64x2, v7: i32):
+    v8 = vconst.i64x2 0x1000000010000000
+    v9 = iadd v6, v8
+    v10 = iconst.i32 1
+    v11 = isub v7, v10
+    brif v11, block2(v9, v11), block3(v9)
+
+block3(v12: i64x2):
+    v13 = iconst.i32 1
+    v14 = isub v5, v13
+    brif v14, block1(v9, v4, v14), block4
+
+block4:
+    return v12
+}
+
+; check:  block0(v0: i64x2, v1: i32, v2: i32):
+; check:      jump block1(v0, v2)
+; check:  block1(v3: i64x2, v5: i32):
+; check:      v8 = vconst.i64x2 const0
+; check:      jump block2(v3, v1)
+; check:  block2(v6: i64x2, v7: i32):
+; check:      v10 = iconst.i32 1
+; check:      v11 = isub v7, v10
+; check:      v9 = iadd v6, v8
+; check:      brif v11, block2(v9, v11), block3
+; check:  block3:
+; check:      v15 = iconst.i32 1
+; check:      v14 = isub.i32 v5, v15
+; check:      brif v14, block1(v9, v14), block4
+; check:  block4:
+; check:      return v9