diff --git a/cranelift/codegen/src/isa/aarch64/inst.isle b/cranelift/codegen/src/isa/aarch64/inst.isle
index f035789c1f..8d182a92de 100644
--- a/cranelift/codegen/src/isa/aarch64/inst.isle
+++ b/cranelift/codegen/src/isa/aarch64/inst.isle
@@ -466,14 +466,6 @@
         (mem PairAMode)
         (flags MemFlags))
 
-       (LoadFpuConst64
-        (rd WritableReg)
-        (const_data u64))
-
-       (LoadFpuConst128
-        (rd WritableReg)
-        (const_data u128))
-
        ;; Conversion: FP -> integer.
        (FpuToInt
         (op FpuToIntOp)
@@ -1135,6 +1127,11 @@
          (off i64)
          (ty Type))
 
+        ;; A reference to a constant which is placed outside of the function's
+        ;; body, typically at the end.
+        (Const
+          (addr VCodeConstant))
+
         ;; Offset from the "nominal stack pointer", which is where the real SP is
         ;; just after stack and spill slots are allocated in the function prologue.
         ;; At emission time, this is converted to `SPOffset` with a fixup added to
@@ -1194,6 +1191,16 @@
 (rule (lane_size (dynamic_lane 32 _)) (ScalarSize.Size32))
 (rule (lane_size (dynamic_lane 64 _)) (ScalarSize.Size64))
 
+;; Helper for extracting the size of a lane from the input `VectorSize`
+(decl pure vector_lane_size (VectorSize) ScalarSize)
+(rule (vector_lane_size (VectorSize.Size8x16)) (ScalarSize.Size8))
+(rule (vector_lane_size (VectorSize.Size8x8))  (ScalarSize.Size8))
+(rule (vector_lane_size (VectorSize.Size16x8)) (ScalarSize.Size16))
+(rule (vector_lane_size (VectorSize.Size16x4)) (ScalarSize.Size16))
+(rule (vector_lane_size (VectorSize.Size32x4)) (ScalarSize.Size32))
+(rule (vector_lane_size (VectorSize.Size32x2)) (ScalarSize.Size32))
+(rule (vector_lane_size (VectorSize.Size64x2)) (ScalarSize.Size64))
+
 (type Cond extern
   (enum
     (Eq)
@@ -1908,6 +1915,13 @@
             (_ Unit (emit (MInst.VecDupFromFpu dst src size lane))))
         dst))
 
+;; Helper for emitting `MInst.VecDupImm` instructions.
+(decl vec_dup_imm (ASIMDMovModImm bool VectorSize) Reg)
+(rule (vec_dup_imm imm invert size)
+      (let ((dst WritableReg (temp_writable_reg $I8X16))
+            (_ Unit (emit (MInst.VecDupImm dst imm invert size))))
+        dst))
+
 ;; Helper for emitting `MInst.AluRRImm12` instructions.
 (decl alu_rr_imm12 (ALUOp Type Reg Imm12) Reg)
 (rule (alu_rr_imm12 op ty src imm)
@@ -2158,6 +2172,13 @@
             (_ Unit (emit (MInst.MovToFpu dst x size))))
         dst))
 
+;; Helper for emitting `MInst.FpuMoveFPImm` instructions.
+(decl fpu_move_fp_imm (ASIMDFPModImm ScalarSize) Reg)
+(rule (fpu_move_fp_imm imm size)
+      (let ((dst WritableReg (temp_writable_reg $I8X16))
+            (_ Unit (emit (MInst.FpuMoveFPImm dst imm size))))
+        dst))
+
 ;; Helper for emitting `MInst.MovToVec` instructions.
 (decl mov_to_vec (Reg Reg u8 VectorSize) Reg)
 (rule (mov_to_vec src1 src2 lane size)
@@ -2986,24 +3007,122 @@
            (amode ty addr offset)))
 
 ;; Lower a constant f32.
-(decl constant_f32 (u64) Reg)
-;; TODO: Port lower_constant_f32() to ISLE.
-(extern constructor constant_f32 constant_f32)
+;;
+;; Note that we must make sure that all bits outside the lowest 32 are set to 0
+;; because this function is also used to load wider constants (that have zeros
+;; in their most significant bits).
+(decl constant_f32 (u32) Reg)
+(rule 2 (constant_f32 0)
+        (vec_dup_imm (asimd_mov_mod_imm_zero (ScalarSize.Size32))
+                     $false
+                     (VectorSize.Size32x2)))
+(rule 1 (constant_f32 n)
+        (if-let imm (asimd_fp_mod_imm_from_u64 n (ScalarSize.Size32)))
+        (fpu_move_fp_imm imm (ScalarSize.Size32)))
+(rule (constant_f32 n)
+      (mov_to_fpu (imm $I32 (ImmExtend.Zero) n) (ScalarSize.Size32)))
 
 ;; Lower a constant f64.
+;;
+;; Note that we must make sure that all bits outside the lowest 64 are set to 0
+;; because this function is also used to load wider constants (that have zeros
+;; in their most significant bits).
+;; TODO: Treat as half of a 128 bit vector and consider replicated patterns.
+;; Scalar MOVI might also be an option.
 (decl constant_f64 (u64) Reg)
-;; TODO: Port lower_constant_f64() to ISLE.
-(extern constructor constant_f64 constant_f64)
+(rule 4 (constant_f64 0)
+        (vec_dup_imm (asimd_mov_mod_imm_zero (ScalarSize.Size32))
+                     $false
+                     (VectorSize.Size32x2)))
+(rule 3 (constant_f64 n)
+        (if-let imm (asimd_fp_mod_imm_from_u64 n (ScalarSize.Size64)))
+        (fpu_move_fp_imm imm (ScalarSize.Size64)))
+(rule 2 (constant_f64 (u64_as_u32 n))
+        (constant_f32 n))
+(rule 1 (constant_f64 (u64_low32_bits_unset n))
+        (mov_to_fpu (imm $I64 (ImmExtend.Zero) n) (ScalarSize.Size64)))
+(rule (constant_f64 n)
+      (fpu_load64 (AMode.Const (emit_u64_le_const n)) (mem_flags_trusted)))
+
+;; Tests whether the low 32 bits in the input are all zero.
+(decl u64_low32_bits_unset (u64) u64)
+(extern extractor u64_low32_bits_unset u64_low32_bits_unset)
 
 ;; Lower a constant f128.
 (decl constant_f128 (u128) Reg)
-;; TODO: Port lower_constant_f128() to ISLE.
-(extern constructor constant_f128 constant_f128)
+(rule 3 (constant_f128 0)
+        (vec_dup_imm (asimd_mov_mod_imm_zero (ScalarSize.Size8))
+                     $false
+                     (VectorSize.Size8x16)))
+
+;; If the upper 64-bits are all zero then defer to `constant_f64`.
+(rule 2 (constant_f128 (u128_as_u64 n)) (constant_f64 n))
+
+;; If the low half of the u128 equals the high half then delegate to the splat
+;; logic as a splat of a 64-bit value.
+(rule 1 (constant_f128 (u128_replicated_u64 n))
+        (splat_const n (VectorSize.Size64x2)))
+
+;; Base case is to load the constant from memory.
+(rule (constant_f128 n)
+      (fpu_load128 (AMode.Const (emit_u128_le_const n)) (mem_flags_trusted)))
 
 ;; Lower a vector splat with a constant parameter.
+;;
+;; The 64-bit input here only uses the low bits for the lane size in
+;; `VectorSize` and all other bits are ignored.
 (decl splat_const (u64 VectorSize) Reg)
-;; TODO: Port lower_splat_const() to ISLE.
-(extern constructor splat_const splat_const)
+
+;; If the splat'd constant can itself be reduced in size then attempt to do so
+;; as it will make it easier to create the immediates in the instructions below.
+(rule 5 (splat_const (u64_replicated_u32 n) (VectorSize.Size64x2))
+        (splat_const n (VectorSize.Size32x4)))
+(rule 5 (splat_const (u32_replicated_u16 n) (VectorSize.Size32x4))
+        (splat_const n (VectorSize.Size16x8)))
+(rule 5 (splat_const (u32_replicated_u16 n) (VectorSize.Size32x2))
+        (splat_const n (VectorSize.Size16x4)))
+(rule 5 (splat_const (u16_replicated_u8 n) (VectorSize.Size16x8))
+        (splat_const n (VectorSize.Size8x16)))
+(rule 5 (splat_const (u16_replicated_u8 n) (VectorSize.Size16x4))
+        (splat_const n (VectorSize.Size8x8)))
+
+;; Special cases for `vec_dup_imm` instructions where the input is either
+;; negated or not.
+(rule 4 (splat_const n size)
+        (if-let imm (asimd_mov_mod_imm_from_u64 n (vector_lane_size size)))
+        (vec_dup_imm imm $false size))
+(rule 3 (splat_const n size)
+        (if-let imm (asimd_mov_mod_imm_from_u64 (u64_not n) (vector_lane_size size)))
+        (vec_dup_imm imm $true size))
+
+;; Special case a 32-bit splat where an immediate can be created by
+;; concatenating the 32-bit constant into a 64-bit value
+(rule 2 (splat_const n (VectorSize.Size32x4))
+        (if-let imm (asimd_mov_mod_imm_from_u64 (u64_or n (u64_shl n 32)) (ScalarSize.Size64)))
+        (vec_dup_imm imm $false (VectorSize.Size64x2)))
+(rule 2 (splat_const n (VectorSize.Size32x2))
+        (if-let imm (asimd_mov_mod_imm_from_u64 (u64_or n (u64_shl n 32)) (ScalarSize.Size64)))
+        (fpu_extend (vec_dup_imm imm $false (VectorSize.Size64x2)) (ScalarSize.Size64)))
+
+(rule 1 (splat_const n size)
+        (if-let imm (asimd_fp_mod_imm_from_u64 n (vector_lane_size size)))
+        (vec_dup_fp_imm imm size))
+
+;; The base case for splat is to use `vec_dup` with the immediate loaded into a
+;; register.
+(rule (splat_const n size)
+      (vec_dup (imm $I64 (ImmExtend.Zero) n) size))
+
+;; Each of these extractors tests whether the upper half of the input equals the
+;; lower half of the input
+(decl u128_replicated_u64 (u64) u128)
+(extern extractor u128_replicated_u64 u128_replicated_u64)
+(decl u64_replicated_u32 (u64) u64)
+(extern extractor u64_replicated_u32 u64_replicated_u32)
+(decl u32_replicated_u16 (u64) u64)
+(extern extractor u32_replicated_u16 u32_replicated_u16)
+(decl u16_replicated_u8 (u64) u64)
+(extern extractor u16_replicated_u8 u16_replicated_u8)
 
 ;; Lower a FloatCC to a Cond.
 (decl fp_cond_code (FloatCC) Cond)
@@ -3814,3 +3933,36 @@
 ;; Helper for emitting the `trn2` instruction
 (decl vec_trn2 (Reg Reg VectorSize) Reg)
 (rule (vec_trn2 rn rm size) (vec_rrr (VecALUOp.Trn2) rn rm size))
+
+;; Helper for creating a zero value `ASIMDMovModImm` immediate.
+(decl asimd_mov_mod_imm_zero (ScalarSize) ASIMDMovModImm)
+(extern constructor asimd_mov_mod_imm_zero asimd_mov_mod_imm_zero)
+
+;; Helper for fallibly creating an `ASIMDMovModImm` immediate from its parts.
+(decl pure partial asimd_mov_mod_imm_from_u64 (u64 ScalarSize) ASIMDMovModImm)
+(extern constructor asimd_mov_mod_imm_from_u64 asimd_mov_mod_imm_from_u64)
+
+;; Helper for fallibly creating an `ASIMDFPModImm` immediate from its parts.
+(decl pure partial asimd_fp_mod_imm_from_u64 (u64 ScalarSize) ASIMDFPModImm)
+(extern constructor asimd_fp_mod_imm_from_u64 asimd_fp_mod_imm_from_u64)
+
+;; Helper for creating a `VecDupFPImm` instruction
+(decl vec_dup_fp_imm (ASIMDFPModImm VectorSize) Reg)
+(rule (vec_dup_fp_imm imm size)
+      (let ((dst WritableReg (temp_writable_reg $I8X16))
+            (_ Unit (emit (MInst.VecDupFPImm dst imm size))))
+       dst))
+
+;; Helper for creating a `FpuLoad64` instruction
+(decl fpu_load64 (AMode MemFlags) Reg)
+(rule (fpu_load64 amode flags)
+      (let ((dst WritableReg (temp_writable_reg $I8X16))
+            (_ Unit (emit (MInst.FpuLoad64 dst amode flags))))
+       dst))
+
+;; Helper for creating a `FpuLoad128` instruction
+(decl fpu_load128 (AMode MemFlags) Reg)
+(rule (fpu_load128 amode flags)
+      (let ((dst WritableReg (temp_writable_reg $I8X16))
+            (_ Unit (emit (MInst.FpuLoad128 dst amode flags))))
+       dst))
diff --git a/cranelift/codegen/src/isa/aarch64/inst/args.rs b/cranelift/codegen/src/isa/aarch64/inst/args.rs
index 69eb7e5251..1c29591ba2 100644
--- a/cranelift/codegen/src/isa/aarch64/inst/args.rs
+++ b/cranelift/codegen/src/isa/aarch64/inst/args.rs
@@ -124,6 +124,9 @@ pub enum MemLabel {
     /// offset from this instruction. This form must be used at emission time;
     /// see `memlabel_finalize()` for how other forms are lowered to this one.
     PCRel(i32),
+    /// An address that refers to a label within a `MachBuffer`, for example a
+    /// constant that lives in the pool at the end of the function.
+    Mach(MachLabel),
 }
 
 impl AMode {
@@ -194,6 +197,7 @@ impl AMode {
             | &AMode::FPOffset { .. }
             | &AMode::SPOffset { .. }
             | &AMode::NominalSPOffset { .. }
+            | &AMode::Const { .. }
             | AMode::Label { .. } => self.clone(),
         }
     }
@@ -382,7 +386,8 @@ impl PrettyPrint for ExtendOp {
 impl PrettyPrint for MemLabel {
     fn pretty_print(&self, _: u8, _: &mut AllocationConsumer<'_>) -> String {
         match self {
-            &MemLabel::PCRel(off) => format!("pc+{}", off),
+            MemLabel::PCRel(off) => format!("pc+{}", off),
+            MemLabel::Mach(off) => format!("label({})", off.get()),
         }
     }
 }
@@ -465,6 +470,8 @@ impl PrettyPrint for AMode {
                 let simm9 = simm9.pretty_print(8, allocs);
                 format!("[sp], {}", simm9)
             }
+            AMode::Const { addr } => format!("[const({})]", addr.as_u32()),
+
             // Eliminated by `mem_finalize()`.
             &AMode::SPOffset { .. }
             | &AMode::FPOffset { .. }
diff --git a/cranelift/codegen/src/isa/aarch64/inst/emit.rs b/cranelift/codegen/src/isa/aarch64/inst/emit.rs
index 2332ff19ba..4f0288dc2b 100644
--- a/cranelift/codegen/src/isa/aarch64/inst/emit.rs
+++ b/cranelift/codegen/src/isa/aarch64/inst/emit.rs
@@ -2,7 +2,7 @@
 
 use regalloc2::Allocation;
 
-use crate::binemit::{CodeOffset, Reloc, StackMap};
+use crate::binemit::{Reloc, StackMap};
 use crate::ir::{types::*, RelSourceLoc};
 use crate::ir::{LibCall, MemFlags, TrapCode};
 use crate::isa::aarch64::inst::*;
@@ -10,20 +10,12 @@ use crate::machinst::{ty_bits, Reg, RegClass, Writable};
 use crate::trace;
 use core::convert::TryFrom;
 
-/// Memory label/reference finalization: convert a MemLabel to a PC-relative
-/// offset, possibly emitting relocation(s) as necessary.
-pub fn memlabel_finalize(_insn_off: CodeOffset, label: &MemLabel) -> i32 {
-    match label {
-        &MemLabel::PCRel(rel) => rel,
-    }
-}
-
 /// Memory addressing mode finalization: convert "special" modes (e.g.,
 /// generic arbitrary stack offset) into real addressing modes, possibly by
 /// emitting some helper instructions that come immediately before the use
 /// of this amode.
 pub fn mem_finalize(
-    insn_off: CodeOffset,
+    sink: Option<&mut MachBuffer<Inst>>,
     mem: &AMode,
     state: &EmitState,
 ) -> (SmallVec<[Inst; 4]>, AMode) {
@@ -74,14 +66,14 @@ pub fn mem_finalize(
             }
         }
 
-        &AMode::Label { ref label } => {
-            let off = memlabel_finalize(insn_off, label);
-            (
-                smallvec![],
-                AMode::Label {
-                    label: MemLabel::PCRel(off),
-                },
-            )
+        AMode::Const { addr } => {
+            let sink = match sink {
+                Some(sink) => sink,
+                None => return (smallvec![], mem.clone()),
+            };
+            let label = sink.get_label_for_constant(*addr);
+            let label = MemLabel::Mach(label);
+            (smallvec![], AMode::Label { label })
         }
 
         _ => (smallvec![], mem.clone()),
@@ -959,7 +951,7 @@ impl MachInstEmit for Inst {
             | &Inst::FpuLoad128 { rd, ref mem, flags } => {
                 let rd = allocs.next_writable(rd);
                 let mem = mem.with_allocs(&mut allocs);
-                let (mem_insts, mem) = mem_finalize(sink.cur_offset(), &mem, state);
+                let (mem_insts, mem) = mem_finalize(Some(sink), &mem, state);
 
                 for inst in mem_insts.into_iter() {
                     inst.emit(&[], sink, emit_info, state);
@@ -1039,7 +1031,19 @@ impl MachInstEmit for Inst {
                     &AMode::Label { ref label } => {
                         let offset = match label {
                             // cast i32 to u32 (two's-complement)
-                            &MemLabel::PCRel(off) => off as u32,
+                            MemLabel::PCRel(off) => *off as u32,
+                            // Emit a relocation into the `MachBuffer`
+                            // for the label that's being loaded from and
+                            // encode an address of 0 in its place which will
+                            // get filled in by relocation resolution later on.
+                            MemLabel::Mach(label) => {
+                                sink.use_label_at_offset(
+                                    sink.cur_offset(),
+                                    *label,
+                                    LabelUse::Ldr19,
+                                );
+                                0
+                            }
                         } / 4;
                         assert!(offset < (1 << 19));
                         match self {
@@ -1076,6 +1080,7 @@ impl MachInstEmit for Inst {
                     &AMode::SPOffset { .. }
                     | &AMode::FPOffset { .. }
                     | &AMode::NominalSPOffset { .. }
+                    | &AMode::Const { .. }
                     | &AMode::RegOffset { .. } => {
                         panic!("Should not see {:?} here!", mem)
                     }
@@ -1091,7 +1096,7 @@ impl MachInstEmit for Inst {
             | &Inst::FpuStore128 { rd, ref mem, flags } => {
                 let rd = allocs.next(rd);
                 let mem = mem.with_allocs(&mut allocs);
-                let (mem_insts, mem) = mem_finalize(sink.cur_offset(), &mem, state);
+                let (mem_insts, mem) = mem_finalize(Some(sink), &mem, state);
 
                 for inst in mem_insts.into_iter() {
                     inst.emit(&[], sink, emit_info, state);
@@ -1172,6 +1177,7 @@ impl MachInstEmit for Inst {
                     &AMode::SPOffset { .. }
                     | &AMode::FPOffset { .. }
                     | &AMode::NominalSPOffset { .. }
+                    | &AMode::Const { .. }
                     | &AMode::RegOffset { .. } => {
                         panic!("Should not see {:?} here!", mem)
                     }
@@ -2319,41 +2325,6 @@ impl MachInstEmit for Inst {
                 };
                 sink.put4(enc_inttofpu(top16, rd, rn));
             }
-            &Inst::LoadFpuConst64 { rd, const_data } => {
-                let rd = allocs.next_writable(rd);
-                let inst = Inst::FpuLoad64 {
-                    rd,
-                    mem: AMode::Label {
-                        label: MemLabel::PCRel(8),
-                    },
-                    flags: MemFlags::trusted(),
-                };
-                inst.emit(&[], sink, emit_info, state);
-                let inst = Inst::Jump {
-                    dest: BranchTarget::ResolvedOffset(12),
-                };
-                inst.emit(&[], sink, emit_info, state);
-                sink.put8(const_data);
-            }
-            &Inst::LoadFpuConst128 { rd, const_data } => {
-                let rd = allocs.next_writable(rd);
-                let inst = Inst::FpuLoad128 {
-                    rd,
-                    mem: AMode::Label {
-                        label: MemLabel::PCRel(8),
-                    },
-                    flags: MemFlags::trusted(),
-                };
-                inst.emit(&[], sink, emit_info, state);
-                let inst = Inst::Jump {
-                    dest: BranchTarget::ResolvedOffset(20),
-                };
-                inst.emit(&[], sink, emit_info, state);
-
-                for i in const_data.to_le_bytes().iter() {
-                    sink.put1(*i);
-                }
-            }
             &Inst::FpuCSel32 { rd, rn, rm, cond } => {
                 let rd = allocs.next_writable(rd);
                 let rn = allocs.next(rn);
@@ -3350,7 +3321,7 @@ impl MachInstEmit for Inst {
             &Inst::LoadAddr { rd, ref mem } => {
                 let rd = allocs.next_writable(rd);
                 let mem = mem.with_allocs(&mut allocs);
-                let (mem_insts, mem) = mem_finalize(sink.cur_offset(), &mem, state);
+                let (mem_insts, mem) = mem_finalize(Some(sink), &mem, state);
                 for inst in mem_insts.into_iter() {
                     inst.emit(&[], sink, emit_info, state);
                 }
diff --git a/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs b/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs
index e06221836b..c0ba8ce1b1 100644
--- a/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs
+++ b/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs
@@ -6891,24 +6891,6 @@ fn test_aarch64_binemit() {
         "stp q18, q22, [sp], #304",
     ));
 
-    insns.push((
-        Inst::LoadFpuConst64 {
-            rd: writable_vreg(16),
-            const_data: 1.0_f64.to_bits(),
-        },
-        "5000005C03000014000000000000F03F",
-        "ldr d16, pc+8 ; b 12 ; data.f64 1",
-    ));
-
-    insns.push((
-        Inst::LoadFpuConst128 {
-            rd: writable_vreg(5),
-            const_data: 0x0f0e0d0c0b0a09080706050403020100,
-        },
-        "4500009C05000014000102030405060708090A0B0C0D0E0F",
-        "ldr q5, pc+8 ; b 20 ; data.f128 0x0f0e0d0c0b0a09080706050403020100",
-    ));
-
     insns.push((
         Inst::FpuCSel32 {
             rd: writable_vreg(1),
diff --git a/cranelift/codegen/src/isa/aarch64/inst/mod.rs b/cranelift/codegen/src/isa/aarch64/inst/mod.rs
index 6e8e602de2..22487b264e 100644
--- a/cranelift/codegen/src/isa/aarch64/inst/mod.rs
+++ b/cranelift/codegen/src/isa/aarch64/inst/mod.rs
@@ -10,7 +10,6 @@ use crate::{settings, CodegenError, CodegenResult};
 use crate::machinst::{PrettyPrint, Reg, RegClass, Writable};
 
 use alloc::vec::Vec;
-use core::convert::TryFrom;
 use regalloc2::{PRegSet, VReg};
 use smallvec::{smallvec, SmallVec};
 use std::string::{String, ToString};
@@ -250,215 +249,6 @@ impl Inst {
         }
     }
 
-    /// Create instructions that load a 32-bit floating-point constant.
-    pub fn load_fp_constant32<F: FnMut(Type) -> Writable<Reg>>(
-        rd: Writable<Reg>,
-        const_data: u32,
-        mut alloc_tmp: F,
-    ) -> SmallVec<[Inst; 4]> {
-        // Note that we must make sure that all bits outside the lowest 32 are set to 0
-        // because this function is also used to load wider constants (that have zeros
-        // in their most significant bits).
-        if const_data == 0 {
-            smallvec![Inst::VecDupImm {
-                rd,
-                imm: ASIMDMovModImm::zero(ScalarSize::Size32),
-                invert: false,
-                size: VectorSize::Size32x2,
-            }]
-        } else if let Some(imm) =
-            ASIMDFPModImm::maybe_from_u64(const_data.into(), ScalarSize::Size32)
-        {
-            smallvec![Inst::FpuMoveFPImm {
-                rd,
-                imm,
-                size: ScalarSize::Size32,
-            }]
-        } else {
-            let tmp = alloc_tmp(I32);
-            let mut insts = Inst::load_constant(tmp, const_data as u64, &mut alloc_tmp);
-
-            insts.push(Inst::MovToFpu {
-                rd,
-                rn: tmp.to_reg(),
-                size: ScalarSize::Size32,
-            });
-
-            insts
-        }
-    }
-
-    /// Create instructions that load a 64-bit floating-point constant.
-    pub fn load_fp_constant64<F: FnMut(Type) -> Writable<Reg>>(
-        rd: Writable<Reg>,
-        const_data: u64,
-        mut alloc_tmp: F,
-    ) -> SmallVec<[Inst; 4]> {
-        // Note that we must make sure that all bits outside the lowest 64 are set to 0
-        // because this function is also used to load wider constants (that have zeros
-        // in their most significant bits).
-        // TODO: Treat as half of a 128 bit vector and consider replicated patterns.
-        // Scalar MOVI might also be an option.
-        if const_data == 0 {
-            smallvec![Inst::VecDupImm {
-                rd,
-                imm: ASIMDMovModImm::zero(ScalarSize::Size32),
-                invert: false,
-                size: VectorSize::Size32x2,
-            }]
-        } else if let Some(imm) = ASIMDFPModImm::maybe_from_u64(const_data, ScalarSize::Size64) {
-            smallvec![Inst::FpuMoveFPImm {
-                rd,
-                imm,
-                size: ScalarSize::Size64,
-            }]
-        } else if let Ok(const_data) = u32::try_from(const_data) {
-            Inst::load_fp_constant32(rd, const_data, alloc_tmp)
-        } else if const_data & (u32::MAX as u64) == 0 {
-            let tmp = alloc_tmp(I64);
-            let mut insts = Inst::load_constant(tmp, const_data, &mut alloc_tmp);
-
-            insts.push(Inst::MovToFpu {
-                rd,
-                rn: tmp.to_reg(),
-                size: ScalarSize::Size64,
-            });
-
-            insts
-        } else {
-            smallvec![Inst::LoadFpuConst64 { rd, const_data }]
-        }
-    }
-
-    /// Create instructions that load a 128-bit vector constant.
-    pub fn load_fp_constant128<F: FnMut(Type) -> Writable<Reg>>(
-        rd: Writable<Reg>,
-        const_data: u128,
-        alloc_tmp: F,
-    ) -> SmallVec<[Inst; 5]> {
-        if let Ok(const_data) = u64::try_from(const_data) {
-            SmallVec::from(&Inst::load_fp_constant64(rd, const_data, alloc_tmp)[..])
-        } else if let Some((pattern, size)) =
-            Inst::get_replicated_vector_pattern(const_data, ScalarSize::Size64)
-        {
-            Inst::load_replicated_vector_pattern(
-                rd,
-                pattern,
-                VectorSize::from_lane_size(size, true),
-                alloc_tmp,
-            )
-        } else {
-            smallvec![Inst::LoadFpuConst128 { rd, const_data }]
-        }
-    }
-
-    /// Determine whether a 128-bit constant represents a vector consisting of elements with
-    /// the same value.
-    pub fn get_replicated_vector_pattern(
-        value: u128,
-        size: ScalarSize,
-    ) -> Option<(u64, ScalarSize)> {
-        let (mask, shift, next_size) = match size {
-            ScalarSize::Size8 => (u8::MAX as u128, 8, ScalarSize::Size128),
-            ScalarSize::Size16 => (u16::MAX as u128, 16, ScalarSize::Size8),
-            ScalarSize::Size32 => (u32::MAX as u128, 32, ScalarSize::Size16),
-            ScalarSize::Size64 => (u64::MAX as u128, 64, ScalarSize::Size32),
-            _ => return None,
-        };
-        let mut r = None;
-        let v = value & mask;
-
-        if (value >> shift) & mask == v {
-            r = Inst::get_replicated_vector_pattern(v, next_size);
-
-            if r.is_none() {
-                r = Some((v as u64, size));
-            }
-        }
-
-        r
-    }
-
-    /// Create instructions that load a vector constant consisting of elements with
-    /// the same value.
-    pub fn load_replicated_vector_pattern<F: FnMut(Type) -> Writable<Reg>>(
-        rd: Writable<Reg>,
-        pattern: u64,
-        size: VectorSize,
-        mut alloc_tmp: F,
-    ) -> SmallVec<[Inst; 5]> {
-        let lane_size = size.lane_size();
-        let widen_32_bit_pattern = |pattern, lane_size| {
-            if lane_size == ScalarSize::Size32 {
-                let pattern = pattern as u32 as u64;
-
-                ASIMDMovModImm::maybe_from_u64(pattern | (pattern << 32), ScalarSize::Size64)
-            } else {
-                None
-            }
-        };
-
-        if let Some(imm) = ASIMDMovModImm::maybe_from_u64(pattern, lane_size) {
-            smallvec![Inst::VecDupImm {
-                rd,
-                imm,
-                invert: false,
-                size
-            }]
-        } else if let Some(imm) = ASIMDMovModImm::maybe_from_u64(!pattern, lane_size) {
-            debug_assert_ne!(lane_size, ScalarSize::Size8);
-            debug_assert_ne!(lane_size, ScalarSize::Size64);
-
-            smallvec![Inst::VecDupImm {
-                rd,
-                imm,
-                invert: true,
-                size
-            }]
-        } else if let Some(imm) = widen_32_bit_pattern(pattern, lane_size) {
-            let mut insts = smallvec![];
-
-            // TODO: Implement support for 64-bit scalar MOVI; we zero-extend the
-            // lower 64 bits instead.
-            if !size.is_128bits() {
-                let tmp = alloc_tmp(types::I64X2);
-                insts.push(Inst::VecDupImm {
-                    rd: tmp,
-                    imm,
-                    invert: false,
-                    size: VectorSize::Size64x2,
-                });
-                insts.push(Inst::FpuExtend {
-                    rd,
-                    rn: tmp.to_reg(),
-                    size: ScalarSize::Size64,
-                });
-            } else {
-                insts.push(Inst::VecDupImm {
-                    rd,
-                    imm,
-                    invert: false,
-                    size: VectorSize::Size64x2,
-                });
-            }
-
-            insts
-        } else if let Some(imm) = ASIMDFPModImm::maybe_from_u64(pattern, lane_size) {
-            smallvec![Inst::VecDupFPImm { rd, imm, size }]
-        } else {
-            let tmp = alloc_tmp(I64);
-            let mut insts = SmallVec::from(&Inst::load_constant(tmp, pattern, &mut alloc_tmp)[..]);
-
-            insts.push(Inst::VecDup {
-                rd,
-                rn: tmp.to_reg(),
-                size,
-            });
-
-            insts
-        }
-    }
-
     /// Generic constructor for a load (zero-extending where appropriate).
     pub fn gen_load(into_reg: Writable<Reg>, mem: AMode, ty: Type, flags: MemFlags) -> Inst {
         match ty {
@@ -585,6 +375,7 @@ fn memarg_operands<F: Fn(VReg) -> VReg>(memarg: &AMode, collector: &mut OperandC
         &AMode::RegOffset { rn, .. } => {
             collector.reg_use(rn);
         }
+        &AMode::Const { .. } => {}
     }
 }
 
@@ -928,9 +719,6 @@ fn aarch64_get_operands<F: Fn(VReg) -> VReg>(inst: &Inst, collector: &mut Operan
             collector.reg_use(rt2);
             pairmemarg_operands(mem, collector);
         }
-        &Inst::LoadFpuConst64 { rd, .. } | &Inst::LoadFpuConst128 { rd, .. } => {
-            collector.reg_def(rd);
-        }
         &Inst::FpuToInt { rd, rn, .. } => {
             collector.reg_def(rd);
             collector.reg_use(rn);
@@ -1318,7 +1106,7 @@ impl MachInst for Inst {
 // Pretty-printing of instructions.
 
 fn mem_finalize_for_show(mem: &AMode, state: &EmitState) -> (String, AMode) {
-    let (mem_insts, mem) = mem_finalize(0, mem, state);
+    let (mem_insts, mem) = mem_finalize(None, mem, state);
     let mut mem_str = mem_insts
         .into_iter()
         .map(|inst| {
@@ -2007,18 +1795,6 @@ impl Inst {
 
                 format!("stp {}, {}, {}", rt, rt2, mem)
             }
-            &Inst::LoadFpuConst64 { rd, const_data } => {
-                let rd = pretty_print_vreg_scalar(rd.to_reg(), ScalarSize::Size64, allocs);
-                format!(
-                    "ldr {}, pc+8 ; b 12 ; data.f64 {}",
-                    rd,
-                    f64::from_bits(const_data)
-                )
-            }
-            &Inst::LoadFpuConst128 { rd, const_data } => {
-                let rd = pretty_print_vreg_scalar(rd.to_reg(), ScalarSize::Size128, allocs);
-                format!("ldr {}, pc+8 ; b 20 ; data.f128 0x{:032x}", rd, const_data)
-            }
             &Inst::FpuToInt { op, rd, rn } => {
                 let (op, sizesrc, sizedest) = match op {
                     FpuToIntOp::F32ToI32 => ("fcvtzs", ScalarSize::Size32, OperandSize::Size32),
@@ -2820,7 +2596,7 @@ impl Inst {
                 // of the existing legalization framework).
                 let rd = allocs.next_writable(rd);
                 let mem = mem.with_allocs(allocs);
-                let (mem_insts, mem) = mem_finalize(0, &mem, state);
+                let (mem_insts, mem) = mem_finalize(None, &mem, state);
                 let mut ret = String::new();
                 for inst in mem_insts.into_iter() {
                     ret.push_str(
diff --git a/cranelift/codegen/src/isa/aarch64/lower.isle b/cranelift/codegen/src/isa/aarch64/lower.isle
index f3a3db7039..2b0d678f14 100644
--- a/cranelift/codegen/src/isa/aarch64/lower.isle
+++ b/cranelift/codegen/src/isa/aarch64/lower.isle
@@ -26,7 +26,7 @@
 
 ;;;; Rules for `f32const` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
-(rule (lower (f32const (u64_from_ieee32 n)))
+(rule (lower (f32const (u32_from_ieee32 n)))
       (constant_f32 n))
 
 ;;;; Rules for `f64const` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@@ -1954,7 +1954,7 @@
 (rule -2 (lower (has_type ty (splat x @ (value_type (ty_scalar_float _)))))
       (vec_dup_from_fpu x (vector_size ty) 0))
 
-(rule (lower (has_type ty (splat (f32const (u64_from_ieee32 n)))))
+(rule (lower (has_type ty (splat (f32const (u32_from_ieee32 n)))))
       (splat_const n (vector_size ty)))
 
 (rule (lower (has_type ty (splat (f64const (u64_from_ieee64 n)))))
diff --git a/cranelift/codegen/src/isa/aarch64/lower.rs b/cranelift/codegen/src/isa/aarch64/lower.rs
index d219451cfb..fcc38d0d59 100644
--- a/cranelift/codegen/src/isa/aarch64/lower.rs
+++ b/cranelift/codegen/src/isa/aarch64/lower.rs
@@ -570,67 +570,6 @@ pub(crate) fn lower_constant_u64(ctx: &mut Lower<Inst>, rd: Writable<Reg>, value
     }
 }
 
-pub(crate) fn lower_constant_f32(ctx: &mut Lower<Inst>, rd: Writable<Reg>, value: f32) {
-    let alloc_tmp = |ty| ctx.alloc_tmp(ty).only_reg().unwrap();
-
-    for inst in Inst::load_fp_constant32(rd, value.to_bits(), alloc_tmp) {
-        ctx.emit(inst);
-    }
-}
-
-pub(crate) fn lower_constant_f64(ctx: &mut Lower<Inst>, rd: Writable<Reg>, value: f64) {
-    let alloc_tmp = |ty| ctx.alloc_tmp(ty).only_reg().unwrap();
-
-    for inst in Inst::load_fp_constant64(rd, value.to_bits(), alloc_tmp) {
-        ctx.emit(inst);
-    }
-}
-
-pub(crate) fn lower_constant_f128(ctx: &mut Lower<Inst>, rd: Writable<Reg>, value: u128) {
-    if value == 0 {
-        // Fast-track a common case.  The general case, viz, calling `Inst::load_fp_constant128`,
-        // is potentially expensive.
-        ctx.emit(Inst::VecDupImm {
-            rd,
-            imm: ASIMDMovModImm::zero(ScalarSize::Size8),
-            invert: false,
-            size: VectorSize::Size8x16,
-        });
-    } else {
-        let alloc_tmp = |ty| ctx.alloc_tmp(ty).only_reg().unwrap();
-        for inst in Inst::load_fp_constant128(rd, value, alloc_tmp) {
-            ctx.emit(inst);
-        }
-    }
-}
-
-pub(crate) fn lower_splat_const(
-    ctx: &mut Lower<Inst>,
-    rd: Writable<Reg>,
-    value: u64,
-    size: VectorSize,
-) {
-    let (value, narrow_size) = match size.lane_size() {
-        ScalarSize::Size8 => (value as u8 as u64, ScalarSize::Size128),
-        ScalarSize::Size16 => (value as u16 as u64, ScalarSize::Size8),
-        ScalarSize::Size32 => (value as u32 as u64, ScalarSize::Size16),
-        ScalarSize::Size64 => (value, ScalarSize::Size32),
-        _ => unreachable!(),
-    };
-    let (value, size) = match Inst::get_replicated_vector_pattern(value as u128, narrow_size) {
-        Some((value, lane_size)) => (
-            value,
-            VectorSize::from_lane_size(lane_size, size.is_128bits()),
-        ),
-        None => (value, size),
-    };
-    let alloc_tmp = |ty| ctx.alloc_tmp(ty).only_reg().unwrap();
-
-    for inst in Inst::load_replicated_vector_pattern(rd, value, size, alloc_tmp) {
-        ctx.emit(inst);
-    }
-}
-
 pub(crate) fn lower_condcode(cc: IntCC) -> Cond {
     match cc {
         IntCC::Equal => Cond::Eq,
diff --git a/cranelift/codegen/src/isa/aarch64/lower/isle.rs b/cranelift/codegen/src/isa/aarch64/lower/isle.rs
index 707243d2a3..d4545565d8 100644
--- a/cranelift/codegen/src/isa/aarch64/lower/isle.rs
+++ b/cranelift/codegen/src/isa/aarch64/lower/isle.rs
@@ -7,17 +7,16 @@ use smallvec::SmallVec;
 
 // Types that the generated ISLE code uses via `use super::*`.
 use super::{
-    fp_reg, lower_condcode, lower_constant_f128, lower_constant_f32, lower_constant_f64,
-    lower_fp_condcode, stack_reg, writable_link_reg, writable_zero_reg, zero_reg, AMode,
-    ASIMDFPModImm, ASIMDMovModImm, BranchTarget, CallIndInfo, CallInfo, Cond, CondBrKind, ExtendOp,
-    FPUOpRI, FPUOpRIMod, FloatCC, Imm12, ImmLogic, ImmShift, Inst as MInst, IntCC, JTSequenceInfo,
-    MachLabel, MemLabel, MoveWideConst, MoveWideOp, NarrowValueMode, Opcode, OperandSize,
-    PairAMode, Reg, SImm9, ScalarSize, ShiftOpAndAmt, UImm12Scaled, UImm5, VecMisc2, VectorSize,
-    NZCV,
+    fp_reg, lower_condcode, lower_fp_condcode, stack_reg, writable_link_reg, writable_zero_reg,
+    zero_reg, AMode, ASIMDFPModImm, ASIMDMovModImm, BranchTarget, CallIndInfo, CallInfo, Cond,
+    CondBrKind, ExtendOp, FPUOpRI, FPUOpRIMod, FloatCC, Imm12, ImmLogic, ImmShift, Inst as MInst,
+    IntCC, JTSequenceInfo, MachLabel, MemLabel, MoveWideConst, MoveWideOp, NarrowValueMode, Opcode,
+    OperandSize, PairAMode, Reg, SImm9, ScalarSize, ShiftOpAndAmt, UImm12Scaled, UImm5, VecMisc2,
+    VectorSize, NZCV,
 };
 use crate::ir::condcodes;
 use crate::isa::aarch64::inst::{FPULeftShiftImm, FPURightShiftImm};
-use crate::isa::aarch64::lower::{lower_address, lower_pair_address, lower_splat_const};
+use crate::isa::aarch64::lower::{lower_address, lower_pair_address};
 use crate::isa::aarch64::AArch64Backend;
 use crate::machinst::valueregs;
 use crate::machinst::{isle::*, InputSourceInst};
@@ -524,38 +523,6 @@ impl Context for IsleContext<'_, '_, MInst, AArch64Backend> {
         lower_pair_address(self.lower_ctx, addr, offset as i32)
     }
 
-    fn constant_f32(&mut self, value: u64) -> Reg {
-        let rd = self.temp_writable_reg(I8X16);
-
-        lower_constant_f32(self.lower_ctx, rd, f32::from_bits(value as u32));
-
-        rd.to_reg()
-    }
-
-    fn constant_f64(&mut self, value: u64) -> Reg {
-        let rd = self.temp_writable_reg(I8X16);
-
-        lower_constant_f64(self.lower_ctx, rd, f64::from_bits(value));
-
-        rd.to_reg()
-    }
-
-    fn constant_f128(&mut self, value: u128) -> Reg {
-        let rd = self.temp_writable_reg(I8X16);
-
-        lower_constant_f128(self.lower_ctx, rd, value);
-
-        rd.to_reg()
-    }
-
-    fn splat_const(&mut self, value: u64, size: &VectorSize) -> Reg {
-        let rd = self.temp_writable_reg(I8X16);
-
-        lower_splat_const(self.lower_ctx, rd, value, *size);
-
-        rd.to_reg()
-    }
-
     fn fp_cond_code(&mut self, cc: &condcodes::FloatCC) -> Cond {
         lower_fp_condcode(*cc)
     }
@@ -612,8 +579,6 @@ impl Context for IsleContext<'_, '_, MInst, AArch64Backend> {
     }
 
     fn min_fp_value(&mut self, signed: bool, in_bits: u8, out_bits: u8) -> Reg {
-        let tmp = self.lower_ctx.alloc_tmp(I8X16).only_reg().unwrap();
-
         if in_bits == 32 {
             // From float32.
             let min = match (signed, out_bits) {
@@ -630,7 +595,7 @@ impl Context for IsleContext<'_, '_, MInst, AArch64Backend> {
                 ),
             };
 
-            lower_constant_f32(self.lower_ctx, tmp, min);
+            generated_code::constructor_constant_f32(self, min.to_bits())
         } else if in_bits == 64 {
             // From float64.
             let min = match (signed, out_bits) {
@@ -647,7 +612,7 @@ impl Context for IsleContext<'_, '_, MInst, AArch64Backend> {
                 ),
             };
 
-            lower_constant_f64(self.lower_ctx, tmp, min);
+            generated_code::constructor_constant_f64(self, min.to_bits())
         } else {
             unimplemented!(
                 "unexpected input size for min_fp_value: {} (signed: {}, output size: {})",
@@ -656,13 +621,9 @@ impl Context for IsleContext<'_, '_, MInst, AArch64Backend> {
                 out_bits
             );
         }
-
-        tmp.to_reg()
     }
 
     fn max_fp_value(&mut self, signed: bool, in_bits: u8, out_bits: u8) -> Reg {
-        let tmp = self.lower_ctx.alloc_tmp(I8X16).only_reg().unwrap();
-
         if in_bits == 32 {
             // From float32.
             let max = match (signed, out_bits) {
@@ -682,7 +643,7 @@ impl Context for IsleContext<'_, '_, MInst, AArch64Backend> {
                 ),
             };
 
-            lower_constant_f32(self.lower_ctx, tmp, max);
+            generated_code::constructor_constant_f32(self, max.to_bits())
         } else if in_bits == 64 {
             // From float64.
             let max = match (signed, out_bits) {
@@ -702,7 +663,7 @@ impl Context for IsleContext<'_, '_, MInst, AArch64Backend> {
                 ),
             };
 
-            lower_constant_f64(self.lower_ctx, tmp, max);
+            generated_code::constructor_constant_f64(self, max.to_bits())
         } else {
             unimplemented!(
                 "unexpected input size for max_fp_value: {} (signed: {}, output size: {})",
@@ -711,8 +672,6 @@ impl Context for IsleContext<'_, '_, MInst, AArch64Backend> {
                 out_bits
             );
         }
-
-        tmp.to_reg()
     }
 
     fn fpu_op_ri_ushr(&mut self, ty_bits: u8, shift: u8) -> FPUOpRI {
@@ -785,4 +744,66 @@ impl Context for IsleContext<'_, '_, MInst, AArch64Backend> {
             None
         }
     }
+
+    fn asimd_mov_mod_imm_zero(&mut self, size: &ScalarSize) -> ASIMDMovModImm {
+        ASIMDMovModImm::zero(*size)
+    }
+
+    fn asimd_mov_mod_imm_from_u64(
+        &mut self,
+        val: u64,
+        size: &ScalarSize,
+    ) -> Option<ASIMDMovModImm> {
+        ASIMDMovModImm::maybe_from_u64(val, *size)
+    }
+
+    fn asimd_fp_mod_imm_from_u64(&mut self, val: u64, size: &ScalarSize) -> Option<ASIMDFPModImm> {
+        ASIMDFPModImm::maybe_from_u64(val, *size)
+    }
+
+    fn u64_low32_bits_unset(&mut self, val: u64) -> Option<u64> {
+        if val & 0xffffffff == 0 {
+            Some(val)
+        } else {
+            None
+        }
+    }
+
+    fn u128_replicated_u64(&mut self, val: u128) -> Option<u64> {
+        let low64 = val as u64 as u128;
+        if (low64 | (low64 << 64)) == val {
+            Some(low64 as u64)
+        } else {
+            None
+        }
+    }
+
+    fn u64_replicated_u32(&mut self, val: u64) -> Option<u64> {
+        let low32 = val as u32 as u64;
+        if (low32 | (low32 << 32)) == val {
+            Some(low32)
+        } else {
+            None
+        }
+    }
+
+    fn u32_replicated_u16(&mut self, val: u64) -> Option<u64> {
+        let val = val as u32;
+        let low16 = val as u16 as u32;
+        if (low16 | (low16 << 16)) == val {
+            Some(low16.into())
+        } else {
+            None
+        }
+    }
+
+    fn u16_replicated_u8(&mut self, val: u64) -> Option<u64> {
+        let val = val as u16;
+        let low8 = val as u8 as u16;
+        if (low8 | (low8 << 8)) == val {
+            Some(low8.into())
+        } else {
+            None
+        }
+    }
 }
diff --git a/cranelift/codegen/src/isa/riscv64/lower.isle b/cranelift/codegen/src/isa/riscv64/lower.isle
index 2691129614..ec74f4555a 100644
--- a/cranelift/codegen/src/isa/riscv64/lower.isle
+++ b/cranelift/codegen/src/isa/riscv64/lower.isle
@@ -11,7 +11,7 @@
 
 ;;;; Rules for `f32const` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
-(rule (lower (f32const (u64_from_ieee32 n)))
+(rule (lower (f32const (u32_from_ieee32 n)))
   (imm $F32 n))
 
 ;;;; Rules for `f64const` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
diff --git a/cranelift/codegen/src/isa/s390x/inst.isle b/cranelift/codegen/src/isa/s390x/inst.isle
index ffc5f7350f..4330b33e01 100644
--- a/cranelift/codegen/src/isa/s390x/inst.isle
+++ b/cranelift/codegen/src/isa/s390x/inst.isle
@@ -896,7 +896,7 @@
     (CallInd
       (link WritableReg)
       (info BoxCallIndInfo))
-    
+
     ;; A pseudo-instruction that captures register arguments in vregs.
     (Args
       (args VecArgPair))
@@ -1555,8 +1555,8 @@
 (decl u8_as_u16 (u8) u16)
 (extern constructor u8_as_u16 u8_as_u16)
 
-(decl u64_as_u32 (u64) u32)
-(extern constructor u64_as_u32 u64_as_u32)
+(decl u64_truncate_to_u32 (u64) u32)
+(extern constructor u64_truncate_to_u32 u64_truncate_to_u32)
 
 (decl u64_as_i16 (u64) i16)
 (extern constructor u64_as_i16 u64_as_i16)
@@ -3000,7 +3000,7 @@
 ;; 32-bit result type, any value
 (rule 5 (imm (gpr32_ty ty) n)
       (let ((dst WritableReg (temp_writable_reg ty))
-            (_ Unit (emit (MInst.Mov32Imm dst (u64_as_u32 n)))))
+            (_ Unit (emit (MInst.Mov32Imm dst (u64_truncate_to_u32 n)))))
         dst))
 
 ;; 64-bit result type, value fits in i16
@@ -3051,7 +3051,7 @@
 ;; TODO: use LZER to load 0.0
 (rule 8 (imm $F32 n)
       (let ((dst WritableReg (temp_writable_reg $F32))
-            (_ Unit (emit (MInst.LoadFpuConst32 dst (u64_as_u32 n)))))
+            (_ Unit (emit (MInst.LoadFpuConst32 dst (u64_truncate_to_u32 n)))))
         dst))
 
 ;; 64-bit floating-point type, any value.  Loaded from literal pool.
diff --git a/cranelift/codegen/src/isa/s390x/lower.isle b/cranelift/codegen/src/isa/s390x/lower.isle
index 02563f4d5b..965074fa69 100644
--- a/cranelift/codegen/src/isa/s390x/lower.isle
+++ b/cranelift/codegen/src/isa/s390x/lower.isle
@@ -18,7 +18,7 @@
 
 ;;;; Rules for `f32const` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
-(rule (lower (f32const (u64_from_ieee32 x)))
+(rule (lower (f32const (u32_from_ieee32 x)))
       (imm $F32 x))
 
 
diff --git a/cranelift/codegen/src/isa/s390x/lower/isle.rs b/cranelift/codegen/src/isa/s390x/lower/isle.rs
index a6dfe21e51..7baf0f5cf5 100644
--- a/cranelift/codegen/src/isa/s390x/lower/isle.rs
+++ b/cranelift/codegen/src/isa/s390x/lower/isle.rs
@@ -436,7 +436,7 @@ impl generated_code::Context for IsleContext<'_, '_, MInst, S390xBackend> {
     }
 
     #[inline]
-    fn u64_as_u32(&mut self, n: u64) -> u32 {
+    fn u64_truncate_to_u32(&mut self, n: u64) -> u32 {
         n as u32
     }
 
diff --git a/cranelift/codegen/src/isa/x64/lower.isle b/cranelift/codegen/src/isa/x64/lower.isle
index a19b6717fa..5b4773f6cb 100644
--- a/cranelift/codegen/src/isa/x64/lower.isle
+++ b/cranelift/codegen/src/isa/x64/lower.isle
@@ -24,7 +24,7 @@
 
 ;;;; Rules for `f32const` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
-(rule (lower (f32const (u64_from_ieee32 x)))
+(rule (lower (f32const (u32_from_ieee32 x)))
       (imm $F32 x))
 
 ;;;; Rules for `f64const` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
diff --git a/cranelift/codegen/src/isle_prelude.rs b/cranelift/codegen/src/isle_prelude.rs
index aa3714f141..9d78044b30 100644
--- a/cranelift/codegen/src/isle_prelude.rs
+++ b/cranelift/codegen/src/isle_prelude.rs
@@ -80,6 +80,11 @@ macro_rules! isle_common_prelude_methods {
             x ^ y
         }
 
+        #[inline]
+        fn u64_shl(&mut self, x: u64, y: u64) -> u64 {
+            x << y
+        }
+
         #[inline]
         fn imm64_shl(&mut self, ty: Type, x: Imm64, y: Imm64) -> Imm64 {
             // Mask off any excess shift bits.
@@ -502,8 +507,8 @@ macro_rules! isle_common_prelude_methods {
             }
         }
 
-        fn u64_from_ieee32(&mut self, val: Ieee32) -> u64 {
-            val.bits().into()
+        fn u32_from_ieee32(&mut self, val: Ieee32) -> u32 {
+            val.bits()
         }
 
         fn u64_from_ieee64(&mut self, val: Ieee64) -> u64 {
@@ -748,5 +753,13 @@ macro_rules! isle_common_prelude_methods {
         fn pack_block_array_2(&mut self, a: BlockCall, b: BlockCall) -> BlockArray2 {
             [a, b]
         }
+
+        fn u128_as_u64(&mut self, val: u128) -> Option<u64> {
+            u64::try_from(val).ok()
+        }
+
+        fn u64_as_u32(&mut self, val: u64) -> Option<u32> {
+            u32::try_from(val).ok()
+        }
     };
 }
diff --git a/cranelift/codegen/src/prelude.isle b/cranelift/codegen/src/prelude.isle
index 637ebd5e68..4815cf6357 100644
--- a/cranelift/codegen/src/prelude.isle
+++ b/cranelift/codegen/src/prelude.isle
@@ -88,10 +88,17 @@
 
 (decl pure u32_as_u64 (u32) u64)
 (extern constructor u32_as_u64 u32_as_u64)
+(convert u32 u64 u32_as_u64)
 
 (decl pure i64_as_u64 (i64) u64)
 (extern constructor i64_as_u64 i64_as_u64)
 
+(decl u128_as_u64 (u64) u128)
+(extern extractor u128_as_u64 u128_as_u64)
+
+(decl u64_as_u32 (u32) u64)
+(extern extractor u64_as_u32 u64_as_u32)
+
 ;;;; Primitive Arithmetic ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
 (decl pure u8_and (u8 u8) u8)
@@ -129,6 +136,9 @@
 (decl pure u64_xor (u64 u64) u64)
 (extern constructor u64_xor u64_xor)
 
+(decl pure u64_shl (u64 u64) u64)
+(extern constructor u64_shl u64_shl)
+
 (decl pure imm64_shl (Type Imm64 Imm64) Imm64)
 (extern constructor imm64_shl imm64_shl)
 
@@ -388,8 +398,8 @@
 (extern constructor imm64_masked imm64_masked)
 
 ;; Extract a `u64` from an `Ieee32`.
-(decl u64_from_ieee32 (u64) Ieee32)
-(extern extractor infallible u64_from_ieee32 u64_from_ieee32)
+(decl u32_from_ieee32 (u32) Ieee32)
+(extern extractor infallible u32_from_ieee32 u32_from_ieee32)
 
 ;; Extract a `u64` from an `Ieee64`.
 (decl u64_from_ieee64 (u64) Ieee64)
diff --git a/cranelift/filetests/filetests/isa/aarch64/constants.clif b/cranelift/filetests/filetests/isa/aarch64/constants.clif
index 53795f2ec1..01d6f5c172 100644
--- a/cranelift/filetests/filetests/isa/aarch64/constants.clif
+++ b/cranelift/filetests/filetests/isa/aarch64/constants.clif
@@ -356,14 +356,14 @@ block0:
 
 ; VCode:
 ; block0:
-;   movz x1, #16457, LSL #48
-;   fmov d0, x1
+;   movz x0, #16457, LSL #48
+;   fmov d0, x0
 ;   ret
 ; 
 ; Disassembled:
 ; block0: ; offset 0x0
-;   mov x1, #0x4049000000000000
-;   fmov d0, x1
+;   mov x0, #0x4049000000000000
+;   fmov d0, x0
 ;   ret
 
 function %f() -> f32 {
@@ -374,14 +374,14 @@ block0:
 
 ; VCode:
 ; block0:
-;   movz x1, #16968, LSL #16
-;   fmov s0, w1
+;   movz w0, #16968, LSL #16
+;   fmov s0, w0
 ;   ret
 ; 
 ; Disassembled:
 ; block0: ; offset 0x0
-;   mov x1, #0x42480000
-;   fmov s0, w1
+;   mov w0, #0x42480000
+;   fmov s0, w0
 ;   ret
 
 function %f() -> f64 {
diff --git a/cranelift/filetests/filetests/isa/aarch64/fcvt-small.clif b/cranelift/filetests/filetests/isa/aarch64/fcvt-small.clif
index 2bd77df458..1074c034af 100644
--- a/cranelift/filetests/filetests/isa/aarch64/fcvt-small.clif
+++ b/cranelift/filetests/filetests/isa/aarch64/fcvt-small.clif
@@ -87,9 +87,9 @@ block0(v0: f32):
 ;   fmov s4, #-1
 ;   fcmp s0, s4
 ;   b.gt 8 ; udf
-;   movz x9, #17280, LSL #16
-;   fmov s17, w9
-;   fcmp s0, s17
+;   movz w8, #17280, LSL #16
+;   fmov s18, w8
+;   fcmp s0, s18
 ;   b.lt 8 ; udf
 ;   fcvtzu w0, s0
 ;   ret
@@ -103,9 +103,9 @@ block0(v0: f32):
 ;   fcmp s0, s4
 ;   b.gt #0x1c
 ;   .byte 0x1f, 0xc1, 0x00, 0x00 ; trap: int_ovf
-;   mov x9, #0x43800000
-;   fmov s17, w9
-;   fcmp s0, s17
+;   mov w8, #0x43800000
+;   fmov s18, w8
+;   fcmp s0, s18
 ;   b.lt #0x30
 ;   .byte 0x1f, 0xc1, 0x00, 0x00 ; trap: int_ovf
 ;   fcvtzu w0, s0
@@ -124,9 +124,9 @@ block0(v0: f64):
 ;   fmov d4, #-1
 ;   fcmp d0, d4
 ;   b.gt 8 ; udf
-;   movz x9, #16496, LSL #48
-;   fmov d17, x9
-;   fcmp d0, d17
+;   movz x8, #16496, LSL #48
+;   fmov d18, x8
+;   fcmp d0, d18
 ;   b.lt 8 ; udf
 ;   fcvtzu w0, d0
 ;   ret
@@ -140,9 +140,9 @@ block0(v0: f64):
 ;   fcmp d0, d4
 ;   b.gt #0x1c
 ;   .byte 0x1f, 0xc1, 0x00, 0x00 ; trap: int_ovf
-;   mov x9, #0x4070000000000000
-;   fmov d17, x9
-;   fcmp d0, d17
+;   mov x8, #0x4070000000000000
+;   fmov d18, x8
+;   fcmp d0, d18
 ;   b.lt #0x30
 ;   .byte 0x1f, 0xc1, 0x00, 0x00 ; trap: int_ovf
 ;   fcvtzu w0, d0
@@ -161,9 +161,9 @@ block0(v0: f32):
 ;   fmov s4, #-1
 ;   fcmp s0, s4
 ;   b.gt 8 ; udf
-;   movz x9, #18304, LSL #16
-;   fmov s17, w9
-;   fcmp s0, s17
+;   movz w8, #18304, LSL #16
+;   fmov s18, w8
+;   fcmp s0, s18
 ;   b.lt 8 ; udf
 ;   fcvtzu w0, s0
 ;   ret
@@ -177,9 +177,9 @@ block0(v0: f32):
 ;   fcmp s0, s4
 ;   b.gt #0x1c
 ;   .byte 0x1f, 0xc1, 0x00, 0x00 ; trap: int_ovf
-;   mov x9, #0x47800000
-;   fmov s17, w9
-;   fcmp s0, s17
+;   mov w8, #0x47800000
+;   fmov s18, w8
+;   fcmp s0, s18
 ;   b.lt #0x30
 ;   .byte 0x1f, 0xc1, 0x00, 0x00 ; trap: int_ovf
 ;   fcvtzu w0, s0
@@ -198,9 +198,9 @@ block0(v0: f64):
 ;   fmov d4, #-1
 ;   fcmp d0, d4
 ;   b.gt 8 ; udf
-;   movz x9, #16624, LSL #48
-;   fmov d17, x9
-;   fcmp d0, d17
+;   movz x8, #16624, LSL #48
+;   fmov d18, x8
+;   fcmp d0, d18
 ;   b.lt 8 ; udf
 ;   fcvtzu w0, d0
 ;   ret
@@ -214,9 +214,9 @@ block0(v0: f64):
 ;   fcmp d0, d4
 ;   b.gt #0x1c
 ;   .byte 0x1f, 0xc1, 0x00, 0x00 ; trap: int_ovf
-;   mov x9, #0x40f0000000000000
-;   fmov d17, x9
-;   fcmp d0, d17
+;   mov x8, #0x40f0000000000000
+;   fmov d18, x8
+;   fcmp d0, d18
 ;   b.lt #0x30
 ;   .byte 0x1f, 0xc1, 0x00, 0x00 ; trap: int_ovf
 ;   fcvtzu w0, d0
diff --git a/cranelift/filetests/filetests/isa/aarch64/fcvt.clif b/cranelift/filetests/filetests/isa/aarch64/fcvt.clif
index 06ba98d8b5..c17f495cc6 100644
--- a/cranelift/filetests/filetests/isa/aarch64/fcvt.clif
+++ b/cranelift/filetests/filetests/isa/aarch64/fcvt.clif
@@ -241,9 +241,9 @@ block0(v0: f32):
 ;   fmov s4, #-1
 ;   fcmp s0, s4
 ;   b.gt 8 ; udf
-;   movz x9, #20352, LSL #16
-;   fmov s17, w9
-;   fcmp s0, s17
+;   movz w8, #20352, LSL #16
+;   fmov s18, w8
+;   fcmp s0, s18
 ;   b.lt 8 ; udf
 ;   fcvtzu w0, s0
 ;   ret
@@ -257,9 +257,9 @@ block0(v0: f32):
 ;   fcmp s0, s4
 ;   b.gt #0x1c
 ;   .byte 0x1f, 0xc1, 0x00, 0x00 ; trap: int_ovf
-;   mov x9, #0x4f800000
-;   fmov s17, w9
-;   fcmp s0, s17
+;   mov w8, #0x4f800000
+;   fmov s18, w8
+;   fcmp s0, s18
 ;   b.lt #0x30
 ;   .byte 0x1f, 0xc1, 0x00, 0x00 ; trap: int_ovf
 ;   fcvtzu w0, s0
@@ -278,9 +278,9 @@ block0(v0: f32):
 ;   fmov s4, #-1
 ;   fcmp s0, s4
 ;   b.gt 8 ; udf
-;   movz x9, #24448, LSL #16
-;   fmov s17, w9
-;   fcmp s0, s17
+;   movz w8, #24448, LSL #16
+;   fmov s18, w8
+;   fcmp s0, s18
 ;   b.lt 8 ; udf
 ;   fcvtzu x0, s0
 ;   ret
@@ -294,9 +294,9 @@ block0(v0: f32):
 ;   fcmp s0, s4
 ;   b.gt #0x1c
 ;   .byte 0x1f, 0xc1, 0x00, 0x00 ; trap: int_ovf
-;   mov x9, #0x5f800000
-;   fmov s17, w9
-;   fcmp s0, s17
+;   mov w8, #0x5f800000
+;   fmov s18, w8
+;   fcmp s0, s18
 ;   b.lt #0x30
 ;   .byte 0x1f, 0xc1, 0x00, 0x00 ; trap: int_ovf
 ;   fcvtzu x0, s0
@@ -315,9 +315,9 @@ block0(v0: f64):
 ;   fmov d4, #-1
 ;   fcmp d0, d4
 ;   b.gt 8 ; udf
-;   movz x9, #16880, LSL #48
-;   fmov d17, x9
-;   fcmp d0, d17
+;   movz x8, #16880, LSL #48
+;   fmov d18, x8
+;   fcmp d0, d18
 ;   b.lt 8 ; udf
 ;   fcvtzu w0, d0
 ;   ret
@@ -331,9 +331,9 @@ block0(v0: f64):
 ;   fcmp d0, d4
 ;   b.gt #0x1c
 ;   .byte 0x1f, 0xc1, 0x00, 0x00 ; trap: int_ovf
-;   mov x9, #0x41f0000000000000
-;   fmov d17, x9
-;   fcmp d0, d17
+;   mov x8, #0x41f0000000000000
+;   fmov d18, x8
+;   fcmp d0, d18
 ;   b.lt #0x30
 ;   .byte 0x1f, 0xc1, 0x00, 0x00 ; trap: int_ovf
 ;   fcvtzu w0, d0
@@ -352,9 +352,9 @@ block0(v0: f64):
 ;   fmov d4, #-1
 ;   fcmp d0, d4
 ;   b.gt 8 ; udf
-;   movz x9, #17392, LSL #48
-;   fmov d17, x9
-;   fcmp d0, d17
+;   movz x8, #17392, LSL #48
+;   fmov d18, x8
+;   fcmp d0, d18
 ;   b.lt 8 ; udf
 ;   fcvtzu x0, d0
 ;   ret
@@ -368,9 +368,9 @@ block0(v0: f64):
 ;   fcmp d0, d4
 ;   b.gt #0x1c
 ;   .byte 0x1f, 0xc1, 0x00, 0x00 ; trap: int_ovf
-;   mov x9, #0x43f0000000000000
-;   fmov d17, x9
-;   fcmp d0, d17
+;   mov x8, #0x43f0000000000000
+;   fmov d18, x8
+;   fcmp d0, d18
 ;   b.lt #0x30
 ;   .byte 0x1f, 0xc1, 0x00, 0x00 ; trap: int_ovf
 ;   fcvtzu x0, d0
@@ -450,13 +450,13 @@ block0(v0: f32):
 ; block0:
 ;   fcmp s0, s0
 ;   b.vc 8 ; udf
-;   movz x5, #52992, LSL #16
-;   fmov s5, w5
-;   fcmp s0, s5
+;   movz w4, #52992, LSL #16
+;   fmov s6, w4
+;   fcmp s0, s6
 ;   b.ge 8 ; udf
-;   movz x11, #20224, LSL #16
-;   fmov s19, w11
-;   fcmp s0, s19
+;   movz w10, #20224, LSL #16
+;   fmov s20, w10
+;   fcmp s0, s20
 ;   b.lt 8 ; udf
 ;   fcvtzs w0, s0
 ;   ret
@@ -466,14 +466,14 @@ block0(v0: f32):
 ;   fcmp s0, s0
 ;   b.vc #0xc
 ;   .byte 0x1f, 0xc1, 0x00, 0x00 ; trap: bad_toint
-;   mov x5, #0xcf000000
-;   fmov s5, w5
-;   fcmp s0, s5
+;   mov w4, #-0x31000000
+;   fmov s6, w4
+;   fcmp s0, s6
 ;   b.ge #0x20
 ;   .byte 0x1f, 0xc1, 0x00, 0x00 ; trap: int_ovf
-;   mov x11, #0x4f000000
-;   fmov s19, w11
-;   fcmp s0, s19
+;   mov w10, #0x4f000000
+;   fmov s20, w10
+;   fcmp s0, s20
 ;   b.lt #0x34
 ;   .byte 0x1f, 0xc1, 0x00, 0x00 ; trap: int_ovf
 ;   fcvtzs w0, s0
@@ -489,13 +489,13 @@ block0(v0: f32):
 ; block0:
 ;   fcmp s0, s0
 ;   b.vc 8 ; udf
-;   movz x5, #57088, LSL #16
-;   fmov s5, w5
-;   fcmp s0, s5
+;   movz w4, #57088, LSL #16
+;   fmov s6, w4
+;   fcmp s0, s6
 ;   b.ge 8 ; udf
-;   movz x11, #24320, LSL #16
-;   fmov s19, w11
-;   fcmp s0, s19
+;   movz w10, #24320, LSL #16
+;   fmov s20, w10
+;   fcmp s0, s20
 ;   b.lt 8 ; udf
 ;   fcvtzs x0, s0
 ;   ret
@@ -505,14 +505,14 @@ block0(v0: f32):
 ;   fcmp s0, s0
 ;   b.vc #0xc
 ;   .byte 0x1f, 0xc1, 0x00, 0x00 ; trap: bad_toint
-;   mov x5, #0xdf000000
-;   fmov s5, w5
-;   fcmp s0, s5
+;   mov w4, #-0x21000000
+;   fmov s6, w4
+;   fcmp s0, s6
 ;   b.ge #0x20
 ;   .byte 0x1f, 0xc1, 0x00, 0x00 ; trap: int_ovf
-;   mov x11, #0x5f000000
-;   fmov s19, w11
-;   fcmp s0, s19
+;   mov w10, #0x5f000000
+;   fmov s20, w10
+;   fcmp s0, s20
 ;   b.lt #0x34
 ;   .byte 0x1f, 0xc1, 0x00, 0x00 ; trap: int_ovf
 ;   fcvtzs x0, s0
@@ -528,12 +528,12 @@ block0(v0: f64):
 ; block0:
 ;   fcmp d0, d0
 ;   b.vc 8 ; udf
-;   ldr d4, pc+8 ; b 12 ; data.f64 -2147483649
+;   ldr d4, [const(0)]
 ;   fcmp d0, d4
 ;   b.gt 8 ; udf
-;   movz x9, #16864, LSL #48
-;   fmov d17, x9
-;   fcmp d0, d17
+;   movz x8, #16864, LSL #48
+;   fmov d18, x8
+;   fcmp d0, d18
 ;   b.lt 8 ; udf
 ;   fcvtzs w0, d0
 ;   ret
@@ -543,20 +543,19 @@ block0(v0: f64):
 ;   fcmp d0, d0
 ;   b.vc #0xc
 ;   .byte 0x1f, 0xc1, 0x00, 0x00 ; trap: bad_toint
-;   ldr d4, #0x14
-;   b #0x1c
-;   .byte 0x00, 0x00, 0x20, 0x00
-;   .byte 0x00, 0x00, 0xe0, 0xc1
+;   ldr d4, #0x38
 ;   fcmp d0, d4
-;   b.gt #0x28
+;   b.gt #0x1c
 ;   .byte 0x1f, 0xc1, 0x00, 0x00 ; trap: int_ovf
-;   mov x9, #0x41e0000000000000
-;   fmov d17, x9
-;   fcmp d0, d17
-;   b.lt #0x3c
+;   mov x8, #0x41e0000000000000
+;   fmov d18, x8
+;   fcmp d0, d18
+;   b.lt #0x30
 ;   .byte 0x1f, 0xc1, 0x00, 0x00 ; trap: int_ovf
 ;   fcvtzs w0, d0
 ;   ret
+;   .byte 0x00, 0x00, 0x20, 0x00
+;   .byte 0x00, 0x00, 0xe0, 0xc1
 
 function %f24(f64) -> i64 {
 block0(v0: f64):
@@ -568,13 +567,13 @@ block0(v0: f64):
 ; block0:
 ;   fcmp d0, d0
 ;   b.vc 8 ; udf
-;   movz x5, #50144, LSL #48
-;   fmov d5, x5
-;   fcmp d0, d5
+;   movz x4, #50144, LSL #48
+;   fmov d6, x4
+;   fcmp d0, d6
 ;   b.ge 8 ; udf
-;   movz x11, #17376, LSL #48
-;   fmov d19, x11
-;   fcmp d0, d19
+;   movz x10, #17376, LSL #48
+;   fmov d20, x10
+;   fcmp d0, d20
 ;   b.lt 8 ; udf
 ;   fcvtzs x0, d0
 ;   ret
@@ -584,14 +583,14 @@ block0(v0: f64):
 ;   fcmp d0, d0
 ;   b.vc #0xc
 ;   .byte 0x1f, 0xc1, 0x00, 0x00 ; trap: bad_toint
-;   mov x5, #-0x3c20000000000000
-;   fmov d5, x5
-;   fcmp d0, d5
+;   mov x4, #-0x3c20000000000000
+;   fmov d6, x4
+;   fcmp d0, d6
 ;   b.ge #0x20
 ;   .byte 0x1f, 0xc1, 0x00, 0x00 ; trap: int_ovf
-;   mov x11, #0x43e0000000000000
-;   fmov d19, x11
-;   fcmp d0, d19
+;   mov x10, #0x43e0000000000000
+;   fmov d20, x10
+;   fcmp d0, d20
 ;   b.lt #0x34
 ;   .byte 0x1f, 0xc1, 0x00, 0x00 ; trap: int_ovf
 ;   fcvtzs x0, d0
diff --git a/cranelift/filetests/filetests/isa/aarch64/floating-point.clif b/cranelift/filetests/filetests/isa/aarch64/floating-point.clif
index 3ec6179544..186b06ad90 100644
--- a/cranelift/filetests/filetests/isa/aarch64/floating-point.clif
+++ b/cranelift/filetests/filetests/isa/aarch64/floating-point.clif
@@ -531,9 +531,9 @@ block0(v0: f32):
 ;   fmov s4, #-1
 ;   fcmp s0, s4
 ;   b.gt 8 ; udf
-;   movz x9, #20352, LSL #16
-;   fmov s17, w9
-;   fcmp s0, s17
+;   movz w8, #20352, LSL #16
+;   fmov s18, w8
+;   fcmp s0, s18
 ;   b.lt 8 ; udf
 ;   fcvtzu w0, s0
 ;   ret
@@ -547,9 +547,9 @@ block0(v0: f32):
 ;   fcmp s0, s4
 ;   b.gt #0x1c
 ;   .byte 0x1f, 0xc1, 0x00, 0x00 ; trap: int_ovf
-;   mov x9, #0x4f800000
-;   fmov s17, w9
-;   fcmp s0, s17
+;   mov w8, #0x4f800000
+;   fmov s18, w8
+;   fcmp s0, s18
 ;   b.lt #0x30
 ;   .byte 0x1f, 0xc1, 0x00, 0x00 ; trap: int_ovf
 ;   fcvtzu w0, s0
@@ -565,13 +565,13 @@ block0(v0: f32):
 ; block0:
 ;   fcmp s0, s0
 ;   b.vc 8 ; udf
-;   movz x5, #52992, LSL #16
-;   fmov s5, w5
-;   fcmp s0, s5
+;   movz w4, #52992, LSL #16
+;   fmov s6, w4
+;   fcmp s0, s6
 ;   b.ge 8 ; udf
-;   movz x11, #20224, LSL #16
-;   fmov s19, w11
-;   fcmp s0, s19
+;   movz w10, #20224, LSL #16
+;   fmov s20, w10
+;   fcmp s0, s20
 ;   b.lt 8 ; udf
 ;   fcvtzs w0, s0
 ;   ret
@@ -581,14 +581,14 @@ block0(v0: f32):
 ;   fcmp s0, s0
 ;   b.vc #0xc
 ;   .byte 0x1f, 0xc1, 0x00, 0x00 ; trap: bad_toint
-;   mov x5, #0xcf000000
-;   fmov s5, w5
-;   fcmp s0, s5
+;   mov w4, #-0x31000000
+;   fmov s6, w4
+;   fcmp s0, s6
 ;   b.ge #0x20
 ;   .byte 0x1f, 0xc1, 0x00, 0x00 ; trap: int_ovf
-;   mov x11, #0x4f000000
-;   fmov s19, w11
-;   fcmp s0, s19
+;   mov w10, #0x4f000000
+;   fmov s20, w10
+;   fcmp s0, s20
 ;   b.lt #0x34
 ;   .byte 0x1f, 0xc1, 0x00, 0x00 ; trap: int_ovf
 ;   fcvtzs w0, s0
@@ -607,9 +607,9 @@ block0(v0: f32):
 ;   fmov s4, #-1
 ;   fcmp s0, s4
 ;   b.gt 8 ; udf
-;   movz x9, #24448, LSL #16
-;   fmov s17, w9
-;   fcmp s0, s17
+;   movz w8, #24448, LSL #16
+;   fmov s18, w8
+;   fcmp s0, s18
 ;   b.lt 8 ; udf
 ;   fcvtzu x0, s0
 ;   ret
@@ -623,9 +623,9 @@ block0(v0: f32):
 ;   fcmp s0, s4
 ;   b.gt #0x1c
 ;   .byte 0x1f, 0xc1, 0x00, 0x00 ; trap: int_ovf
-;   mov x9, #0x5f800000
-;   fmov s17, w9
-;   fcmp s0, s17
+;   mov w8, #0x5f800000
+;   fmov s18, w8
+;   fcmp s0, s18
 ;   b.lt #0x30
 ;   .byte 0x1f, 0xc1, 0x00, 0x00 ; trap: int_ovf
 ;   fcvtzu x0, s0
@@ -641,13 +641,13 @@ block0(v0: f32):
 ; block0:
 ;   fcmp s0, s0
 ;   b.vc 8 ; udf
-;   movz x5, #57088, LSL #16
-;   fmov s5, w5
-;   fcmp s0, s5
+;   movz w4, #57088, LSL #16
+;   fmov s6, w4
+;   fcmp s0, s6
 ;   b.ge 8 ; udf
-;   movz x11, #24320, LSL #16
-;   fmov s19, w11
-;   fcmp s0, s19
+;   movz w10, #24320, LSL #16
+;   fmov s20, w10
+;   fcmp s0, s20
 ;   b.lt 8 ; udf
 ;   fcvtzs x0, s0
 ;   ret
@@ -657,14 +657,14 @@ block0(v0: f32):
 ;   fcmp s0, s0
 ;   b.vc #0xc
 ;   .byte 0x1f, 0xc1, 0x00, 0x00 ; trap: bad_toint
-;   mov x5, #0xdf000000
-;   fmov s5, w5
-;   fcmp s0, s5
+;   mov w4, #-0x21000000
+;   fmov s6, w4
+;   fcmp s0, s6
 ;   b.ge #0x20
 ;   .byte 0x1f, 0xc1, 0x00, 0x00 ; trap: int_ovf
-;   mov x11, #0x5f000000
-;   fmov s19, w11
-;   fcmp s0, s19
+;   mov w10, #0x5f000000
+;   fmov s20, w10
+;   fcmp s0, s20
 ;   b.lt #0x34
 ;   .byte 0x1f, 0xc1, 0x00, 0x00 ; trap: int_ovf
 ;   fcvtzs x0, s0
@@ -683,9 +683,9 @@ block0(v0: f64):
 ;   fmov d4, #-1
 ;   fcmp d0, d4
 ;   b.gt 8 ; udf
-;   movz x9, #16880, LSL #48
-;   fmov d17, x9
-;   fcmp d0, d17
+;   movz x8, #16880, LSL #48
+;   fmov d18, x8
+;   fcmp d0, d18
 ;   b.lt 8 ; udf
 ;   fcvtzu w0, d0
 ;   ret
@@ -699,9 +699,9 @@ block0(v0: f64):
 ;   fcmp d0, d4
 ;   b.gt #0x1c
 ;   .byte 0x1f, 0xc1, 0x00, 0x00 ; trap: int_ovf
-;   mov x9, #0x41f0000000000000
-;   fmov d17, x9
-;   fcmp d0, d17
+;   mov x8, #0x41f0000000000000
+;   fmov d18, x8
+;   fcmp d0, d18
 ;   b.lt #0x30
 ;   .byte 0x1f, 0xc1, 0x00, 0x00 ; trap: int_ovf
 ;   fcvtzu w0, d0
@@ -717,12 +717,12 @@ block0(v0: f64):
 ; block0:
 ;   fcmp d0, d0
 ;   b.vc 8 ; udf
-;   ldr d4, pc+8 ; b 12 ; data.f64 -2147483649
+;   ldr d4, [const(0)]
 ;   fcmp d0, d4
 ;   b.gt 8 ; udf
-;   movz x9, #16864, LSL #48
-;   fmov d17, x9
-;   fcmp d0, d17
+;   movz x8, #16864, LSL #48
+;   fmov d18, x8
+;   fcmp d0, d18
 ;   b.lt 8 ; udf
 ;   fcvtzs w0, d0
 ;   ret
@@ -732,20 +732,19 @@ block0(v0: f64):
 ;   fcmp d0, d0
 ;   b.vc #0xc
 ;   .byte 0x1f, 0xc1, 0x00, 0x00 ; trap: bad_toint
-;   ldr d4, #0x14
-;   b #0x1c
-;   .byte 0x00, 0x00, 0x20, 0x00
-;   .byte 0x00, 0x00, 0xe0, 0xc1
+;   ldr d4, #0x38
 ;   fcmp d0, d4
-;   b.gt #0x28
+;   b.gt #0x1c
 ;   .byte 0x1f, 0xc1, 0x00, 0x00 ; trap: int_ovf
-;   mov x9, #0x41e0000000000000
-;   fmov d17, x9
-;   fcmp d0, d17
-;   b.lt #0x3c
+;   mov x8, #0x41e0000000000000
+;   fmov d18, x8
+;   fcmp d0, d18
+;   b.lt #0x30
 ;   .byte 0x1f, 0xc1, 0x00, 0x00 ; trap: int_ovf
 ;   fcvtzs w0, d0
 ;   ret
+;   .byte 0x00, 0x00, 0x20, 0x00
+;   .byte 0x00, 0x00, 0xe0, 0xc1
 
 function %f39(f64) -> i64 {
 block0(v0: f64):
@@ -760,9 +759,9 @@ block0(v0: f64):
 ;   fmov d4, #-1
 ;   fcmp d0, d4
 ;   b.gt 8 ; udf
-;   movz x9, #17392, LSL #48
-;   fmov d17, x9
-;   fcmp d0, d17
+;   movz x8, #17392, LSL #48
+;   fmov d18, x8
+;   fcmp d0, d18
 ;   b.lt 8 ; udf
 ;   fcvtzu x0, d0
 ;   ret
@@ -776,9 +775,9 @@ block0(v0: f64):
 ;   fcmp d0, d4
 ;   b.gt #0x1c
 ;   .byte 0x1f, 0xc1, 0x00, 0x00 ; trap: int_ovf
-;   mov x9, #0x43f0000000000000
-;   fmov d17, x9
-;   fcmp d0, d17
+;   mov x8, #0x43f0000000000000
+;   fmov d18, x8
+;   fcmp d0, d18
 ;   b.lt #0x30
 ;   .byte 0x1f, 0xc1, 0x00, 0x00 ; trap: int_ovf
 ;   fcvtzu x0, d0
@@ -794,13 +793,13 @@ block0(v0: f64):
 ; block0:
 ;   fcmp d0, d0
 ;   b.vc 8 ; udf
-;   movz x5, #50144, LSL #48
-;   fmov d5, x5
-;   fcmp d0, d5
+;   movz x4, #50144, LSL #48
+;   fmov d6, x4
+;   fcmp d0, d6
 ;   b.ge 8 ; udf
-;   movz x11, #17376, LSL #48
-;   fmov d19, x11
-;   fcmp d0, d19
+;   movz x10, #17376, LSL #48
+;   fmov d20, x10
+;   fcmp d0, d20
 ;   b.lt 8 ; udf
 ;   fcvtzs x0, d0
 ;   ret
@@ -810,14 +809,14 @@ block0(v0: f64):
 ;   fcmp d0, d0
 ;   b.vc #0xc
 ;   .byte 0x1f, 0xc1, 0x00, 0x00 ; trap: bad_toint
-;   mov x5, #-0x3c20000000000000
-;   fmov d5, x5
-;   fcmp d0, d5
+;   mov x4, #-0x3c20000000000000
+;   fmov d6, x4
+;   fcmp d0, d6
 ;   b.ge #0x20
 ;   .byte 0x1f, 0xc1, 0x00, 0x00 ; trap: int_ovf
-;   mov x11, #0x43e0000000000000
-;   fmov d19, x11
-;   fcmp d0, d19
+;   mov x10, #0x43e0000000000000
+;   fmov d20, x10
+;   fcmp d0, d20
 ;   b.lt #0x34
 ;   .byte 0x1f, 0xc1, 0x00, 0x00 ; trap: int_ovf
 ;   fcvtzs x0, d0
diff --git a/cranelift/filetests/filetests/isa/aarch64/shuffle.clif b/cranelift/filetests/filetests/isa/aarch64/shuffle.clif
index c52959b886..c9020f0cc1 100644
--- a/cranelift/filetests/filetests/isa/aarch64/shuffle.clif
+++ b/cranelift/filetests/filetests/isa/aarch64/shuffle.clif
@@ -12,7 +12,7 @@ block0(v0: i8x16, v1: i8x16):
 ; block0:
 ;   mov v30.16b, v0.16b
 ;   mov v31.16b, v1.16b
-;   ldr q3, pc+8 ; b 20 ; data.f128 0x05110f0204180d170b0c06041a1f0003
+;   ldr q3, [const(0)]
 ;   tbl v0.16b, { v30.16b, v31.16b }, v3.16b
 ;   ret
 ; 
@@ -20,14 +20,16 @@ block0(v0: i8x16, v1: i8x16):
 ; block0: ; offset 0x0
 ;   mov v30.16b, v0.16b
 ;   mov v31.16b, v1.16b
-;   ldr q3, #0x10
-;   b #0x20
+;   ldr q3, #0x20
+;   tbl v0.16b, {v30.16b, v31.16b}, v3.16b
+;   ret
+;   .byte 0x00, 0x00, 0x00, 0x00
+;   .byte 0x00, 0x00, 0x00, 0x00
+;   .byte 0x00, 0x00, 0x00, 0x00
 ;   adc w3, w0, wzr
 ;   add w4, w16, w12, lsl #1
 ;   orr z23.b, p3/m, z23.b, z8.b
 ;   mov z2.b, p1/z, #0x78
-;   tbl v0.16b, {v30.16b, v31.16b}, v3.16b
-;   ret
 
 function %aarch64_uzp1_i8x16(i8x16, i8x16) -> i8x16 {
 block0(v0: i8x16, v1: i8x16):
@@ -541,7 +543,7 @@ block0(v0: i8x16, v1: i8x16):
 ; block0:
 ;   mov v30.16b, v0.16b
 ;   mov v31.16b, v1.16b
-;   ldr q3, pc+8 ; b 20 ; data.f128 0x1f1e1d1c1b1a19181716151413121110
+;   ldr q3, [const(0)]
 ;   tbl v0.16b, { v30.16b, v31.16b }, v3.16b
 ;   ret
 ; 
@@ -549,14 +551,16 @@ block0(v0: i8x16, v1: i8x16):
 ; block0: ; offset 0x0
 ;   mov v30.16b, v0.16b
 ;   mov v31.16b, v1.16b
-;   ldr q3, #0x10
-;   b #0x20
-;   sbfiz w16, w8, #0xe, #5
-;   b #0xfffffffffc585464
-;   madd w24, w8, w26, w6
-;   fmadd s28, s8, s30, s7
+;   ldr q3, #0x20
 ;   tbl v0.16b, {v30.16b, v31.16b}, v3.16b
 ;   ret
+;   .byte 0x00, 0x00, 0x00, 0x00
+;   .byte 0x00, 0x00, 0x00, 0x00
+;   .byte 0x00, 0x00, 0x00, 0x00
+;   sbfiz w16, w8, #0xe, #5
+;   b #0xfffffffffc585474
+;   madd w24, w8, w26, w6
+;   fmadd s28, s8, s30, s7
 
 function %aarch64_dup_i8x16(i8x16, i8x16) -> i8x16 {
 block0(v0: i8x16, v1: i8x16):
diff --git a/cranelift/filetests/filetests/isa/aarch64/simd-arithmetic.clif b/cranelift/filetests/filetests/isa/aarch64/simd-arithmetic.clif
index 3487163f41..5a05e8925b 100644
--- a/cranelift/filetests/filetests/isa/aarch64/simd-arithmetic.clif
+++ b/cranelift/filetests/filetests/isa/aarch64/simd-arithmetic.clif
@@ -106,10 +106,10 @@ block0(v0: i64x2, v1: i64x2):
 
 ; VCode:
 ; block0:
-;   movz x4, #1
-;   dup v4.2d, x4
+;   movz x3, #1
+;   dup v5.2d, x3
 ;   orr v7.16b, v0.16b, v1.16b
-;   and v17.16b, v7.16b, v4.16b
+;   and v17.16b, v7.16b, v5.16b
 ;   ushr v19.2d, v0.2d, #1
 ;   ushr v21.2d, v1.2d, #1
 ;   add v23.2d, v19.2d, v21.2d
@@ -118,10 +118,10 @@ block0(v0: i64x2, v1: i64x2):
 ; 
 ; Disassembled:
 ; block0: ; offset 0x0
-;   mov x4, #1
-;   dup v4.2d, x4
+;   mov x3, #1
+;   dup v5.2d, x3
 ;   orr v7.16b, v0.16b, v1.16b
-;   and v17.16b, v7.16b, v4.16b
+;   and v17.16b, v7.16b, v5.16b
 ;   ushr v19.2d, v0.2d, #1
 ;   ushr v21.2d, v1.2d, #1
 ;   add v23.2d, v19.2d, v21.2d
diff --git a/cranelift/filetests/filetests/isa/aarch64/simd-bitwise-compile.clif b/cranelift/filetests/filetests/isa/aarch64/simd-bitwise-compile.clif
index b4449a9670..9b7b851128 100644
--- a/cranelift/filetests/filetests/isa/aarch64/simd-bitwise-compile.clif
+++ b/cranelift/filetests/filetests/isa/aarch64/simd-bitwise-compile.clif
@@ -228,7 +228,7 @@ block0(v0: i32):
 
 ; VCode:
 ; block0:
-;   ldr q5, pc+8 ; b 20 ; data.f128 0x0f0e0d0c0b0a09080706050403020100
+;   ldr q5, [const(0)]
 ;   and w3, w0, #7
 ;   dup v6.16b, w3
 ;   sshl v0.16b, v5.16b, v6.16b
@@ -236,16 +236,18 @@ block0(v0: i32):
 ; 
 ; Disassembled:
 ; block0: ; offset 0x0
-;   ldr q5, #8
-;   b #0x18
-;   .byte 0x00, 0x01, 0x02, 0x03
-;   .byte 0x04, 0x05, 0x06, 0x07
-;   add w8, w8, w10, lsl #2
-;   .byte 0x0c, 0x0d, 0x0e, 0x0f
+;   ldr q5, #0x20
 ;   and w3, w0, #7
 ;   dup v6.16b, w3
 ;   sshl v0.16b, v5.16b, v6.16b
 ;   ret
+;   .byte 0x00, 0x00, 0x00, 0x00
+;   .byte 0x00, 0x00, 0x00, 0x00
+;   .byte 0x00, 0x00, 0x00, 0x00
+;   .byte 0x00, 0x01, 0x02, 0x03
+;   .byte 0x04, 0x05, 0x06, 0x07
+;   add w8, w8, w10, lsl #2
+;   .byte 0x0c, 0x0d, 0x0e, 0x0f
 
 function %ushr_i8x16_imm() -> i8x16 {
 block0:
@@ -257,7 +259,7 @@ block0:
 
 ; VCode:
 ; block0:
-;   ldr q5, pc+8 ; b 20 ; data.f128 0x0f0e0d0c0b0a09080706050403020100
+;   ldr q5, [const(0)]
 ;   movz w1, #1
 ;   and w3, w1, #7
 ;   sub x5, xzr, x3
@@ -267,18 +269,18 @@ block0:
 ; 
 ; Disassembled:
 ; block0: ; offset 0x0
-;   ldr q5, #8
-;   b #0x18
-;   .byte 0x00, 0x01, 0x02, 0x03
-;   .byte 0x04, 0x05, 0x06, 0x07
-;   add w8, w8, w10, lsl #2
-;   .byte 0x0c, 0x0d, 0x0e, 0x0f
+;   ldr q5, #0x20
 ;   mov w1, #1
 ;   and w3, w1, #7
 ;   neg x5, x3
 ;   dup v7.16b, w5
 ;   ushl v0.16b, v5.16b, v7.16b
 ;   ret
+;   .byte 0x00, 0x00, 0x00, 0x00
+;   .byte 0x00, 0x01, 0x02, 0x03
+;   .byte 0x04, 0x05, 0x06, 0x07
+;   add w8, w8, w10, lsl #2
+;   .byte 0x0c, 0x0d, 0x0e, 0x0f
 
 function %sshr_i8x16(i32) -> i8x16 {
 block0(v0: i32):
@@ -289,7 +291,7 @@ block0(v0: i32):
 
 ; VCode:
 ; block0:
-;   ldr q6, pc+8 ; b 20 ; data.f128 0x0f0e0d0c0b0a09080706050403020100
+;   ldr q6, [const(0)]
 ;   and w3, w0, #7
 ;   sub x5, xzr, x3
 ;   dup v7.16b, w5
@@ -298,17 +300,18 @@ block0(v0: i32):
 ; 
 ; Disassembled:
 ; block0: ; offset 0x0
-;   ldr q6, #8
-;   b #0x18
-;   .byte 0x00, 0x01, 0x02, 0x03
-;   .byte 0x04, 0x05, 0x06, 0x07
-;   add w8, w8, w10, lsl #2
-;   .byte 0x0c, 0x0d, 0x0e, 0x0f
+;   ldr q6, #0x20
 ;   and w3, w0, #7
 ;   neg x5, x3
 ;   dup v7.16b, w5
 ;   sshl v0.16b, v6.16b, v7.16b
 ;   ret
+;   .byte 0x00, 0x00, 0x00, 0x00
+;   .byte 0x00, 0x00, 0x00, 0x00
+;   .byte 0x00, 0x01, 0x02, 0x03
+;   .byte 0x04, 0x05, 0x06, 0x07
+;   add w8, w8, w10, lsl #2
+;   .byte 0x0c, 0x0d, 0x0e, 0x0f
 
 function %sshr_i8x16_imm(i8x16, i32) -> i8x16 {
 block0(v0: i8x16, v1: i32):
diff --git a/cranelift/filetests/filetests/isa/aarch64/simd-lane-access-compile.clif b/cranelift/filetests/filetests/isa/aarch64/simd-lane-access-compile.clif
index af539f84ba..3b19f5d0e1 100644
--- a/cranelift/filetests/filetests/isa/aarch64/simd-lane-access-compile.clif
+++ b/cranelift/filetests/filetests/isa/aarch64/simd-lane-access-compile.clif
@@ -15,25 +15,26 @@ block0:
 ; VCode:
 ; block0:
 ;   movi v30.16b, #0
-;   movz x4, #1
-;   fmov s31, w4
-;   ldr q3, pc+8 ; b 20 ; data.f128 0x11000000000000000000000000000000
+;   movz w3, #1
+;   fmov s31, w3
+;   ldr q3, [const(0)]
 ;   tbl v0.16b, { v30.16b, v31.16b }, v3.16b
 ;   ret
 ; 
 ; Disassembled:
 ; block0: ; offset 0x0
 ;   movi v30.16b, #0
-;   mov x4, #1
-;   fmov s31, w4
-;   ldr q3, #0x14
-;   b #0x24
+;   mov w3, #1
+;   fmov s31, w3
+;   ldr q3, #0x20
+;   tbl v0.16b, {v30.16b, v31.16b}, v3.16b
+;   ret
+;   .byte 0x00, 0x00, 0x00, 0x00
+;   .byte 0x00, 0x00, 0x00, 0x00
 ;   .byte 0x00, 0x00, 0x00, 0x00
 ;   .byte 0x00, 0x00, 0x00, 0x00
 ;   .byte 0x00, 0x00, 0x00, 0x00
 ;   add w0, w0, #0
-;   tbl v0.16b, {v30.16b, v31.16b}, v3.16b
-;   ret
 
 function %shuffle_same_ssa_value() -> i8x16 {
 block0:
@@ -44,26 +45,27 @@ block0:
 
 ; VCode:
 ; block0:
-;   movz x3, #1
-;   fmov s31, w3
-;   ldr q2, pc+8 ; b 20 ; data.f128 0x13000000000000000000000000000000
+;   movz w2, #1
+;   fmov s31, w2
+;   ldr q2, [const(0)]
 ;   mov v30.16b, v31.16b
 ;   tbl v0.16b, { v30.16b, v31.16b }, v2.16b
 ;   ret
 ; 
 ; Disassembled:
 ; block0: ; offset 0x0
-;   mov x3, #1
-;   fmov s31, w3
-;   ldr q2, #0x10
-;   b #0x20
+;   mov w2, #1
+;   fmov s31, w2
+;   ldr q2, #0x20
+;   mov v30.16b, v31.16b
+;   tbl v0.16b, {v30.16b, v31.16b}, v2.16b
+;   ret
+;   .byte 0x00, 0x00, 0x00, 0x00
+;   .byte 0x00, 0x00, 0x00, 0x00
 ;   .byte 0x00, 0x00, 0x00, 0x00
 ;   .byte 0x00, 0x00, 0x00, 0x00
 ;   .byte 0x00, 0x00, 0x00, 0x00
 ;   sbfx w0, w0, #0, #1
-;   mov v30.16b, v31.16b
-;   tbl v0.16b, {v30.16b, v31.16b}, v2.16b
-;   ret
 
 function %swizzle() -> i8x16 {
 block0:
@@ -75,27 +77,25 @@ block0:
 
 ; VCode:
 ; block0:
-;   ldr q2, pc+8 ; b 20 ; data.f128 0x0f0e0d0c0b0a09080706050403020100
-;   ldr q3, pc+8 ; b 20 ; data.f128 0x0f0e0d0c0b0a09080706050403020100
+;   ldr q2, [const(1)]
+;   ldr q3, [const(0)]
 ;   tbl v0.16b, { v2.16b }, v3.16b
 ;   ret
 ; 
 ; Disassembled:
 ; block0: ; offset 0x0
-;   ldr q2, #8
-;   b #0x18
-;   .byte 0x00, 0x01, 0x02, 0x03
-;   .byte 0x04, 0x05, 0x06, 0x07
-;   add w8, w8, w10, lsl #2
-;   .byte 0x0c, 0x0d, 0x0e, 0x0f
-;   ldr q3, #0x20
-;   b #0x30
-;   .byte 0x00, 0x01, 0x02, 0x03
-;   .byte 0x04, 0x05, 0x06, 0x07
-;   add w8, w8, w10, lsl #2
-;   .byte 0x0c, 0x0d, 0x0e, 0x0f
+;   ldr q2, #0x20
+;   ldr q3, #0x10
 ;   tbl v0.16b, {v2.16b}, v3.16b
 ;   ret
+;   .byte 0x00, 0x01, 0x02, 0x03
+;   .byte 0x04, 0x05, 0x06, 0x07
+;   add w8, w8, w10, lsl #2
+;   .byte 0x0c, 0x0d, 0x0e, 0x0f
+;   .byte 0x00, 0x01, 0x02, 0x03
+;   .byte 0x04, 0x05, 0x06, 0x07
+;   add w8, w8, w10, lsl #2
+;   .byte 0x0c, 0x0d, 0x0e, 0x0f
 
 function %splat_i8(i8) -> i8x16 {
 block0(v0: i8):
diff --git a/cranelift/filetests/filetests/isa/aarch64/simd.clif b/cranelift/filetests/filetests/isa/aarch64/simd.clif
index 4933878ac3..47e1f23f85 100644
--- a/cranelift/filetests/filetests/isa/aarch64/simd.clif
+++ b/cranelift/filetests/filetests/isa/aarch64/simd.clif
@@ -33,14 +33,14 @@ block0:
 
 ; VCode:
 ; block0:
-;   movz x1, #42679
-;   dup v0.8h, w1
+;   movz x0, #42679
+;   dup v0.8h, w0
 ;   ret
 ; 
 ; Disassembled:
 ; block0: ; offset 0x0
-;   mov x1, #0xa6b7
-;   dup v0.8h, w1
+;   mov x0, #0xa6b7
+;   dup v0.8h, w0
 ;   ret
 
 function %f4(i32, i8x16, i8x16) -> i8x16 {
@@ -156,14 +156,14 @@ block0:
 
 ; VCode:
 ; block0:
-;   movi v1.2d, #18374687579166474495
-;   fmov d0, d1
+;   movi v0.2d, #18374687579166474495
+;   fmov d0, d0
 ;   ret
 ; 
 ; Disassembled:
 ; block0: ; offset 0x0
-;   movi v1.2d, #0xff0000ffff0000ff
-;   fmov d0, d1
+;   movi v0.2d, #0xff0000ffff0000ff
+;   fmov d0, d0
 ;   ret
 
 function %f10() -> i32x4 {
diff --git a/cranelift/filetests/filetests/isa/aarch64/vhigh_bits.clif b/cranelift/filetests/filetests/isa/aarch64/vhigh_bits.clif
index e0034049af..6a36c90191 100644
--- a/cranelift/filetests/filetests/isa/aarch64/vhigh_bits.clif
+++ b/cranelift/filetests/filetests/isa/aarch64/vhigh_bits.clif
@@ -10,12 +10,12 @@ block0(v0: i8x16):
 ; VCode:
 ; block0:
 ;   sshr v2.16b, v0.16b, #7
-;   movz x5, #513
-;   movk x5, x5, #2052, LSL #16
-;   movk x5, x5, #8208, LSL #32
-;   movk x5, x5, #32832, LSL #48
-;   dup v16.2d, x5
-;   and v22.16b, v2.16b, v16.16b
+;   movz x7, #513
+;   movk x7, x7, #2052, LSL #16
+;   movk x7, x7, #8208, LSL #32
+;   movk x7, x7, #32832, LSL #48
+;   dup v20.2d, x7
+;   and v22.16b, v2.16b, v20.16b
 ;   ext v24.16b, v22.16b, v22.16b, #8
 ;   zip1 v26.16b, v22.16b, v24.16b
 ;   addv h28, v26.8h
@@ -25,12 +25,12 @@ block0(v0: i8x16):
 ; Disassembled:
 ; block0: ; offset 0x0
 ;   sshr v2.16b, v0.16b, #7
-;   mov x5, #0x201
-;   movk x5, #0x804, lsl #16
-;   movk x5, #0x2010, lsl #32
-;   movk x5, #0x8040, lsl #48
-;   dup v16.2d, x5
-;   and v22.16b, v2.16b, v16.16b
+;   mov x7, #0x201
+;   movk x7, #0x804, lsl #16
+;   movk x7, #0x2010, lsl #32
+;   movk x7, #0x8040, lsl #48
+;   dup v20.2d, x7
+;   and v22.16b, v2.16b, v20.16b
 ;   ext v24.16b, v22.16b, v22.16b, #8
 ;   zip1 v26.16b, v22.16b, v24.16b
 ;   addv h28, v26.8h
@@ -46,12 +46,12 @@ block0(v0: i8x16):
 ; VCode:
 ; block0:
 ;   sshr v2.16b, v0.16b, #7
-;   movz x5, #513
-;   movk x5, x5, #2052, LSL #16
-;   movk x5, x5, #8208, LSL #32
-;   movk x5, x5, #32832, LSL #48
-;   dup v16.2d, x5
-;   and v22.16b, v2.16b, v16.16b
+;   movz x7, #513
+;   movk x7, x7, #2052, LSL #16
+;   movk x7, x7, #8208, LSL #32
+;   movk x7, x7, #32832, LSL #48
+;   dup v20.2d, x7
+;   and v22.16b, v2.16b, v20.16b
 ;   ext v24.16b, v22.16b, v22.16b, #8
 ;   zip1 v26.16b, v22.16b, v24.16b
 ;   addv h28, v26.8h
@@ -61,12 +61,12 @@ block0(v0: i8x16):
 ; Disassembled:
 ; block0: ; offset 0x0
 ;   sshr v2.16b, v0.16b, #7
-;   mov x5, #0x201
-;   movk x5, #0x804, lsl #16
-;   movk x5, #0x2010, lsl #32
-;   movk x5, #0x8040, lsl #48
-;   dup v16.2d, x5
-;   and v22.16b, v2.16b, v16.16b
+;   mov x7, #0x201
+;   movk x7, #0x804, lsl #16
+;   movk x7, #0x2010, lsl #32
+;   movk x7, #0x8040, lsl #48
+;   dup v20.2d, x7
+;   and v22.16b, v2.16b, v20.16b
 ;   ext v24.16b, v22.16b, v22.16b, #8
 ;   zip1 v26.16b, v22.16b, v24.16b
 ;   addv h28, v26.8h
@@ -82,7 +82,7 @@ block0(v0: i16x8):
 ; VCode:
 ; block0:
 ;   sshr v2.8h, v0.8h, #15
-;   ldr q4, pc+8 ; b 20 ; data.f128 0x00800040002000100008000400020001
+;   ldr q4, [const(0)]
 ;   and v6.16b, v2.16b, v4.16b
 ;   addv h16, v6.8h
 ;   umov w0, v16.h[0]
@@ -91,16 +91,17 @@ block0(v0: i16x8):
 ; Disassembled:
 ; block0: ; offset 0x0
 ;   sshr v2.8h, v0.8h, #0xf
-;   ldr q4, #0xc
-;   b #0x1c
-;   .byte 0x01, 0x00, 0x02, 0x00
-;   .byte 0x04, 0x00, 0x08, 0x00
-;   .byte 0x10, 0x00, 0x20, 0x00
-;   .byte 0x40, 0x00, 0x80, 0x00
+;   ldr q4, #0x20
 ;   and v6.16b, v2.16b, v4.16b
 ;   addv h16, v6.8h
 ;   umov w0, v16.h[0]
 ;   ret
+;   .byte 0x00, 0x00, 0x00, 0x00
+;   .byte 0x00, 0x00, 0x00, 0x00
+;   .byte 0x01, 0x00, 0x02, 0x00
+;   .byte 0x04, 0x00, 0x08, 0x00
+;   .byte 0x10, 0x00, 0x20, 0x00
+;   .byte 0x40, 0x00, 0x80, 0x00
 
 function %f4(i32x4) -> i8 {
 block0(v0: i32x4):
@@ -111,7 +112,7 @@ block0(v0: i32x4):
 ; VCode:
 ; block0:
 ;   sshr v2.4s, v0.4s, #31
-;   ldr q4, pc+8 ; b 20 ; data.f128 0x00000008000000040000000200000001
+;   ldr q4, [const(0)]
 ;   and v6.16b, v2.16b, v4.16b
 ;   addv s16, v6.4s
 ;   mov w0, v16.s[0]
@@ -120,16 +121,17 @@ block0(v0: i32x4):
 ; Disassembled:
 ; block0: ; offset 0x0
 ;   sshr v2.4s, v0.4s, #0x1f
-;   ldr q4, #0xc
-;   b #0x1c
-;   .byte 0x01, 0x00, 0x00, 0x00
-;   .byte 0x02, 0x00, 0x00, 0x00
-;   .byte 0x04, 0x00, 0x00, 0x00
-;   .byte 0x08, 0x00, 0x00, 0x00
+;   ldr q4, #0x20
 ;   and v6.16b, v2.16b, v4.16b
 ;   addv s16, v6.4s
 ;   mov w0, v16.s[0]
 ;   ret
+;   .byte 0x00, 0x00, 0x00, 0x00
+;   .byte 0x00, 0x00, 0x00, 0x00
+;   .byte 0x01, 0x00, 0x00, 0x00
+;   .byte 0x02, 0x00, 0x00, 0x00
+;   .byte 0x04, 0x00, 0x00, 0x00
+;   .byte 0x08, 0x00, 0x00, 0x00
 
 function %f5(i64x2) -> i8 {
 block0(v0: i64x2):