diff --git a/cranelift/codegen/src/isa/aarch64/lower.rs b/cranelift/codegen/src/isa/aarch64/lower.rs
index e7853ca98e..039f3b3cd3 100644
--- a/cranelift/codegen/src/isa/aarch64/lower.rs
+++ b/cranelift/codegen/src/isa/aarch64/lower.rs
@@ -701,7 +701,7 @@ pub(crate) fn lower_pair_address<C: LowerCtx<I = Inst>>(
     // Collect addends through an arbitrary tree of 32-to-64-bit sign/zero
     // extends and addition ops. We update these as we consume address
     // components, so they represent the remaining addends not yet handled.
-    let (addends64, addends32, args_offset) = collect_address_addends(ctx, roots);
+    let (mut addends64, mut addends32, args_offset) = collect_address_addends(ctx, roots);
     let offset = args_offset + (offset as i64);
 
     trace!(
@@ -713,41 +713,40 @@ pub(crate) fn lower_pair_address<C: LowerCtx<I = Inst>>(
 
     // Pairs basically only have reg + imm formats so we only have to worry about those
 
-    let imm7_offset = SImm7Scaled::maybe_from_i64(offset, I64);
-    match (&addends64[..], &addends32[..], imm7_offset) {
-        (&[add64], &[], Some(offset)) => PairAMode::SignedOffset(add64, offset),
-        (&[], &[add32], Some(offset)) => {
-            let tmp = ctx.alloc_tmp(I64).only_reg().unwrap();
-            let (reg, extendop) = add32;
-            let signed = match extendop {
-                ExtendOp::SXTW => true,
-                ExtendOp::UXTW => false,
-                _ => unreachable!(),
-            };
-            ctx.emit(Inst::Extend {
-                rd: tmp,
-                rn: reg,
-                signed,
-                from_bits: 32,
-                to_bits: 64,
-            });
-            PairAMode::SignedOffset(tmp.to_reg(), offset)
-        }
-        (&[], &[], Some(offset)) => PairAMode::SignedOffset(zero_reg(), offset),
+    let base_reg = if let Some(reg64) = addends64.pop() {
+        reg64
+    } else if let Some((reg32, extendop)) = addends32.pop() {
+        let tmp = ctx.alloc_tmp(I64).only_reg().unwrap();
+        let signed = match extendop {
+            ExtendOp::SXTW => true,
+            ExtendOp::UXTW => false,
+            _ => unreachable!(),
+        };
+        ctx.emit(Inst::Extend {
+            rd: tmp,
+            rn: reg32,
+            signed,
+            from_bits: 32,
+            to_bits: 64,
+        });
+        tmp.to_reg()
+    } else {
+        zero_reg()
+    };
 
-        (_, _, _) => {
-            // This is the general case, we just grab all addends and sum them into a register
-            let addr = ctx.alloc_tmp(I64).only_reg().unwrap();
-            lower_add_addends(ctx, addr, addends64, addends32);
+    let addr = ctx.alloc_tmp(I64).only_reg().unwrap();
+    ctx.emit(Inst::gen_move(addr, base_reg, I64));
 
-            let imm7 = imm7_offset.unwrap_or_else(|| {
-                lower_add_immediate(ctx, addr, addr.to_reg(), offset);
-                SImm7Scaled::maybe_from_i64(0, I64).unwrap()
-            });
+    // We have the base register, if we have any others, we need to add them
+    lower_add_addends(ctx, addr, addends64, addends32);
 
-            PairAMode::SignedOffset(addr.to_reg(), imm7)
-        }
-    }
+    // Figure out what offset we should emit
+    let imm7 = SImm7Scaled::maybe_from_i64(offset, I64).unwrap_or_else(|| {
+        lower_add_immediate(ctx, addr, addr.to_reg(), offset);
+        SImm7Scaled::maybe_from_i64(0, I64).unwrap()
+    });
+
+    PairAMode::SignedOffset(addr.to_reg(), imm7)
 }
 
 /// Lower the address of a load or store.
diff --git a/cranelift/filetests/filetests/isa/aarch64/amodes.clif b/cranelift/filetests/filetests/isa/aarch64/amodes.clif
index 837ba1815f..fbab91d7f7 100644
--- a/cranelift/filetests/filetests/isa/aarch64/amodes.clif
+++ b/cranelift/filetests/filetests/isa/aarch64/amodes.clif
@@ -386,3 +386,130 @@ block0(v0: i64, v1: i64, v2: i64):
 ; nextln: ldrsh x0, [x0]
 ; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret
+
+
+function %i128(i64) -> i128 {
+block0(v0: i64):
+  v1 = load.i128 v0
+  store.i128 v1, v0
+  return v1
+}
+
+; check: stp fp, lr, [sp, #-16]!
+; nextln: mov fp, sp
+; nextln: mov x1, x0
+; nextln: ldp x2, x1, [x1]
+; nextln: stp x2, x1, [x0]
+; nextln: mov x0, x2
+; nextln: ldp fp, lr, [sp], #16
+; nextln: ret
+
+
+function %i128_imm_offset(i64) -> i128 {
+block0(v0: i64):
+  v1 = load.i128 v0+16
+  store.i128 v1, v0+16
+  return v1
+}
+
+; check: stp fp, lr, [sp, #-16]!
+; nextln: mov fp, sp
+; nextln: mov x1, x0
+; nextln: ldp x2, x1, [x1, #16]
+; nextln: stp x2, x1, [x0, #16]
+; nextln: mov x0, x2
+; nextln: ldp fp, lr, [sp], #16
+; nextln: ret
+
+function %i128_imm_offset_large(i64) -> i128 {
+block0(v0: i64):
+  v1 = load.i128 v0+504
+  store.i128 v1, v0+504
+  return v1
+}
+
+; check: stp fp, lr, [sp, #-16]!
+; nextln: mov fp, sp
+; nextln: mov x1, x0
+; nextln: ldp x2, x1, [x1, #504]
+; nextln: stp x2, x1, [x0, #504]
+; nextln: mov x0, x2
+; nextln: ldp fp, lr, [sp], #16
+; nextln: ret
+
+function %i128_imm_offset_negative_large(i64) -> i128 {
+block0(v0: i64):
+  v1 = load.i128 v0-512
+  store.i128 v1, v0-512
+  return v1
+}
+
+; check: stp fp, lr, [sp, #-16]!
+; nextln: mov fp, sp
+; nextln: mov x1, x0
+; nextln: ldp x2, x1, [x1, #-512]
+; nextln: stp x2, x1, [x0, #-512]
+; nextln: mov x0, x2
+; nextln: ldp fp, lr, [sp], #16
+; nextln: ret
+
+
+function %i128_add_offset(i64) -> i128 {
+block0(v0: i64):
+  v1 = iadd_imm v0, 32
+  v2 = load.i128 v1
+  store.i128 v2, v1
+  return v2
+}
+
+; check: stp fp, lr, [sp, #-16]!
+; nextln: mov fp, sp
+; nextln: mov x1, x0
+; nextln: ldp x2, x1, [x1, #32]
+; nextln: stp x2, x1, [x0, #32]
+; nextln: mov x0, x2
+; nextln: ldp fp, lr, [sp], #16
+; nextln: ret
+
+
+function %i128_32bit_sextend_simple(i32) -> i128 {
+block0(v0: i32):
+  v1 = sextend.i64 v0
+  v2 = load.i128 v1
+  store.i128 v2, v1
+  return v2
+}
+
+; TODO: We should be able to deduplicate the sxtw instruction
+; check: stp fp, lr, [sp, #-16]!
+; nextln: mov fp, sp
+; nextln: sxtw x1, w0
+; nextln: ldp x2, x1, [x1]
+; nextln: sxtw x0, w0
+; nextln: stp x2, x1, [x0]
+; nextln: mov x0, x2
+; nextln: ldp fp, lr, [sp], #16
+; nextln: ret
+
+
+function %i128_32bit_sextend(i64, i32) -> i128 {
+block0(v0: i64, v1: i32):
+  v2 = sextend.i64 v1
+  v3 = iadd.i64 v0, v2
+  v4 = iadd_imm.i64 v3, 24
+  v5 = load.i128 v4
+  store.i128 v5, v4
+  return v5
+}
+
+; check: stp fp, lr, [sp, #-16]!
+; nextln: mov fp, sp
+; nextln: mov x2, x0
+; nextln: add x2, x2, x1, SXTW
+; nextln: ldp x3, x2, [x2, #24]
+; nextln: add x0, x0, x1, SXTW
+; nextln: stp x3, x2, [x0, #24]
+; nextln: mov x0, x3
+; nextln: mov x1, x2
+; nextln: ldp fp, lr, [sp], #16
+; nextln: ret
diff --git a/cranelift/filetests/filetests/runtests/i128-load-store.clif b/cranelift/filetests/filetests/runtests/i128-load-store.clif
index b02e8ec26b..41046e8717 100644
--- a/cranelift/filetests/filetests/runtests/i128-load-store.clif
+++ b/cranelift/filetests/filetests/runtests/i128-load-store.clif
@@ -89,3 +89,48 @@ block0(v0: i64,v1: i64):
 ; run: %i128_stack_store_load_big_offset(0x01234567_89ABCDEF, 0xFEDCBA98_76543210) == true
 ; run: %i128_stack_store_load_big_offset(0x06060606_06060606, 0xA00A00A0_0A00A00A) == true
 ; run: %i128_stack_store_load_big_offset(0xC0FFEEEE_DECAFFFF, 0xDECAFFFF_C0FFEEEE) == true
+
+
+
+function %i128_store_load(i64, i64) -> b1 {
+    ss0 = explicit_slot 16
+
+block0(v0: i64,v1: i64):
+    v2 = iconcat v0, v1
+
+    v3 = stack_addr.i64 ss0
+    store.i128 v2, v3
+    v4 = load.i128 v3
+
+    v5 = icmp.i128 eq v2, v4
+    return v5
+}
+; run: %i128_store_load(0, 0) == true
+; run: %i128_store_load(-1, -1) == true
+; run: %i128_store_load(-1, 0) == true
+; run: %i128_store_load(0, -1) == true
+; run: %i128_store_load(0x01234567_89ABCDEF, 0xFEDCBA98_76543210) == true
+; run: %i128_store_load(0x06060606_06060606, 0xA00A00A0_0A00A00A) == true
+; run: %i128_store_load(0xC0FFEEEE_DECAFFFF, 0xDECAFFFF_C0FFEEEE) == true
+
+
+function %i128_store_load_offset(i64, i64) -> b1 {
+    ss0 = explicit_slot 32
+
+block0(v0: i64,v1: i64):
+    v2 = iconcat v0, v1
+
+    v3 = stack_addr.i64 ss0
+    store.i128 v2, v3+16
+    v4 = load.i128 v3+16
+
+    v5 = icmp.i128 eq v2, v4
+    return v5
+}
+; run: %i128_store_load_offset(0, 0) == true
+; run: %i128_store_load_offset(-1, -1) == true
+; run: %i128_store_load_offset(-1, 0) == true
+; run: %i128_store_load_offset(0, -1) == true
+; run: %i128_store_load_offset(0x01234567_89ABCDEF, 0xFEDCBA98_76543210) == true
+; run: %i128_store_load_offset(0x06060606_06060606, 0xA00A00A0_0A00A00A) == true
+; run: %i128_store_load_offset(0xC0FFEEEE_DECAFFFF, 0xDECAFFFF_C0FFEEEE) == true