diff --git a/cranelift/codegen/src/isa/aarch64/inst/emit.rs b/cranelift/codegen/src/isa/aarch64/inst/emit.rs
index 597c9ac592..15a3542b57 100644
--- a/cranelift/codegen/src/isa/aarch64/inst/emit.rs
+++ b/cranelift/codegen/src/isa/aarch64/inst/emit.rs
@@ -258,10 +258,6 @@ fn enc_ldst_vec(q: u32, size: u32, rn: Reg, rt: Writable<Reg>) -> u32 {
         | machreg_to_vec(rt.to_reg())
 }
 
-fn enc_extend(top22: u32, rd: Writable<Reg>, rn: Reg) -> u32 {
-    (top22 << 10) | (machreg_to_gpr(rn) << 5) | machreg_to_gpr(rd.to_reg())
-}
-
 fn enc_vec_rrr(top11: u32, rm: Reg, bit15_10: u32, rn: Reg, rd: Writable<Reg>) -> u32 {
     (top11 << 21)
         | (machreg_to_vec(rm) << 16)
@@ -313,6 +309,12 @@ fn enc_cset(rd: Writable<Reg>, cond: Cond) -> u32 {
         | (cond.invert().bits() << 12)
 }
 
+fn enc_csetm(rd: Writable<Reg>, cond: Cond) -> u32 {
+    0b110_11010100_11111_0000_00_11111_00000
+        | machreg_to_gpr(rd.to_reg())
+        | (cond.invert().bits() << 12)
+}
+
 fn enc_ccmp_imm(size: OperandSize, rn: Reg, imm: UImm5, nzcv: NZCV, cond: Cond) -> u32 {
     0b0_1_1_11010010_00000_0000_10_00000_0_0000
         | size.sf_bit() << 31
@@ -322,6 +324,29 @@ fn enc_ccmp_imm(size: OperandSize, rn: Reg, imm: UImm5, nzcv: NZCV, cond: Cond)
         | nzcv.bits()
 }
 
+fn enc_bfm(opc: u8, size: OperandSize, rd: Writable<Reg>, rn: Reg, immr: u8, imms: u8) -> u32 {
+    match size {
+        OperandSize::Size64 => {
+            debug_assert!(immr <= 63);
+            debug_assert!(imms <= 63);
+        }
+        OperandSize::Size32 => {
+            debug_assert!(immr <= 31);
+            debug_assert!(imms <= 31);
+        }
+    }
+    debug_assert_eq!(opc & 0b11, opc);
+    let n_bit = size.sf_bit();
+    0b0_00_100110_0_000000_000000_00000_00000
+        | size.sf_bit() << 31
+        | u32::from(opc) << 29
+        | n_bit << 22
+        | u32::from(immr) << 16
+        | u32::from(imms) << 10
+        | machreg_to_gpr(rn) << 5
+        | machreg_to_gpr(rd.to_reg())
+}
+
 fn enc_vecmov(is_16b: bool, rd: Writable<Reg>, rn: Reg) -> u32 {
     0b00001110_101_00000_00011_1_00000_00000
         | ((is_16b as u32) << 30)
@@ -1020,6 +1045,9 @@ impl MachInstEmit for Inst {
             &Inst::CSet { rd, cond } => {
                 sink.put4(enc_cset(rd, cond));
             }
+            &Inst::CSetm { rd, cond } => {
+                sink.put4(enc_csetm(rd, cond));
+            }
             &Inst::CCmpImm {
                 size,
                 rn,
@@ -1958,75 +1986,47 @@ impl MachInstEmit for Inst {
             &Inst::Extend {
                 rd,
                 rn,
-                signed,
-                from_bits,
+                signed: false,
+                from_bits: 1,
                 to_bits,
-            } if from_bits >= 8 => {
-                let top22 = match (signed, from_bits, to_bits) {
-                    (false, 8, 32) => 0b010_100110_0_000000_000111, // UXTB (32)
-                    (false, 16, 32) => 0b010_100110_0_000000_001111, // UXTH (32)
-                    (true, 8, 32) => 0b000_100110_0_000000_000111,  // SXTB (32)
-                    (true, 16, 32) => 0b000_100110_0_000000_001111, // SXTH (32)
-                    // The 64-bit unsigned variants are the same as the 32-bit ones,
-                    // because writes to Wn zero out the top 32 bits of Xn
-                    (false, 8, 64) => 0b010_100110_0_000000_000111, // UXTB (64)
-                    (false, 16, 64) => 0b010_100110_0_000000_001111, // UXTH (64)
-                    (true, 8, 64) => 0b100_100110_1_000000_000111,  // SXTB (64)
-                    (true, 16, 64) => 0b100_100110_1_000000_001111, // SXTH (64)
-                    // 32-to-64: the unsigned case is a 'mov' (special-cased below).
-                    (false, 32, 64) => 0,                           // MOV
-                    (true, 32, 64) => 0b100_100110_1_000000_011111, // SXTW (64)
-                    _ => panic!(
-                        "Unsupported extend combination: signed = {}, from_bits = {}, to_bits = {}",
-                        signed, from_bits, to_bits
-                    ),
-                };
-                if top22 != 0 {
-                    sink.put4(enc_extend(top22, rd, rn));
-                } else {
-                    let mov = Inst::Mov32 { rd, rm: rn };
-
-                    mov.emit(sink, emit_info, state);
-                }
-            }
-            &Inst::Extend {
-                rd,
-                rn,
-                signed,
-                from_bits,
-                to_bits,
-            } if from_bits == 1 && signed => {
-                assert!(to_bits <= 64);
-                // Reduce sign-extend-from-1-bit to:
-                // - and rd, rn, #1
-                // - sub rd, zr, rd
-
-                // We don't have ImmLogic yet, so we just hardcode this. FIXME.
-                sink.put4(0x92400000 | (machreg_to_gpr(rn) << 5) | machreg_to_gpr(rd.to_reg()));
-                let sub_inst = Inst::AluRRR {
-                    alu_op: ALUOp::Sub64,
-                    rd,
-                    rn: zero_reg(),
-                    rm: rd.to_reg(),
-                };
-                sub_inst.emit(sink, emit_info, state);
-            }
-            &Inst::Extend {
-                rd,
-                rn,
-                signed,
-                from_bits,
-                to_bits,
-            } if from_bits == 1 && !signed => {
+            } => {
                 assert!(to_bits <= 64);
                 // Reduce zero-extend-from-1-bit to:
                 // - and rd, rn, #1
-
-                // We don't have ImmLogic yet, so we just hardcode this. FIXME.
-                sink.put4(0x92400000 | (machreg_to_gpr(rn) << 5) | machreg_to_gpr(rd.to_reg()));
+                // Note: This is special cased as UBFX may take more cycles
+                // than AND on smaller cores.
+                let imml = ImmLogic::maybe_from_u64(1, I32).unwrap();
+                Inst::AluRRImmLogic {
+                    alu_op: ALUOp::And32,
+                    rd,
+                    rn,
+                    imml,
+                }
+                .emit(sink, emit_info, state);
             }
-            &Inst::Extend { .. } => {
-                panic!("Unsupported extend variant");
+            &Inst::Extend {
+                rd,
+                rn,
+                signed: false,
+                from_bits: 32,
+                to_bits: 64,
+            } => {
+                let mov = Inst::Mov32 { rd, rm: rn };
+                mov.emit(sink, emit_info, state);
+            }
+            &Inst::Extend {
+                rd,
+                rn,
+                signed,
+                from_bits,
+                to_bits,
+            } => {
+                let (opc, size) = if signed {
+                    (0b00, OperandSize::from_bits(to_bits))
+                } else {
+                    (0b10, OperandSize::Size32)
+                };
+                sink.put4(enc_bfm(opc, size, rd, rn, 0, from_bits - 1));
             }
             &Inst::Jump { ref dest } => {
                 let off = sink.cur_offset();
diff --git a/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs b/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs
index 74aac428ef..6f79a5c7d1 100644
--- a/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs
+++ b/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs
@@ -1784,6 +1784,22 @@ fn test_aarch64_binemit() {
         "EFB79F9A",
         "cset x15, ge",
     ));
+    insns.push((
+        Inst::CSetm {
+            rd: writable_xreg(0),
+            cond: Cond::Eq,
+        },
+        "E0139FDA",
+        "csetm x0, eq",
+    ));
+    insns.push((
+        Inst::CSetm {
+            rd: writable_xreg(16),
+            cond: Cond::Vs,
+        },
+        "F0739FDA",
+        "csetm x16, vs",
+    ));
     insns.push((
         Inst::CCmpImm {
             size: OperandSize::Size64,
@@ -3890,6 +3906,50 @@ fn test_aarch64_binemit() {
         "vcsel v5.16b, v10.16b, v19.16b, gt (if-then-else diamond)",
     ));
 
+    insns.push((
+        Inst::Extend {
+            rd: writable_xreg(3),
+            rn: xreg(5),
+            signed: false,
+            from_bits: 1,
+            to_bits: 32,
+        },
+        "A3000012",
+        "and w3, w5, #1",
+    ));
+    insns.push((
+        Inst::Extend {
+            rd: writable_xreg(3),
+            rn: xreg(5),
+            signed: false,
+            from_bits: 1,
+            to_bits: 64,
+        },
+        "A3000012",
+        "and w3, w5, #1",
+    ));
+    insns.push((
+        Inst::Extend {
+            rd: writable_xreg(10),
+            rn: xreg(21),
+            signed: true,
+            from_bits: 1,
+            to_bits: 32,
+        },
+        "AA020013",
+        "sbfx w10, w21, #0, #1",
+    ));
+    insns.push((
+        Inst::Extend {
+            rd: writable_xreg(1),
+            rn: xreg(2),
+            signed: true,
+            from_bits: 1,
+            to_bits: 64,
+        },
+        "41004093",
+        "sbfx x1, x2, #0, #1",
+    ));
     insns.push((
         Inst::Extend {
             rd: writable_xreg(1),
@@ -3943,7 +4003,7 @@ fn test_aarch64_binemit() {
             to_bits: 64,
         },
         "411C0053",
-        "uxtb x1, w2",
+        "uxtb w1, w2",
     ));
     insns.push((
         Inst::Extend {
@@ -3965,7 +4025,7 @@ fn test_aarch64_binemit() {
             to_bits: 64,
         },
         "413C0053",
-        "uxth x1, w2",
+        "uxth w1, w2",
     ));
     insns.push((
         Inst::Extend {
diff --git a/cranelift/codegen/src/isa/aarch64/inst/mod.rs b/cranelift/codegen/src/isa/aarch64/inst/mod.rs
index d09637298c..c24344a303 100644
--- a/cranelift/codegen/src/isa/aarch64/inst/mod.rs
+++ b/cranelift/codegen/src/isa/aarch64/inst/mod.rs
@@ -648,6 +648,12 @@ pub enum Inst {
         cond: Cond,
     },
 
+    /// A conditional-set-mask operation.
+    CSetm {
+        rd: Writable<Reg>,
+        cond: Cond,
+    },
+
     /// A conditional comparison with an immediate.
     CCmpImm {
         size: OperandSize,
@@ -1596,7 +1602,7 @@ fn aarch64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
             collector.add_use(rn);
             collector.add_use(rm);
         }
-        &Inst::CSet { rd, .. } => {
+        &Inst::CSet { rd, .. } | &Inst::CSetm { rd, .. } => {
             collector.add_def(rd);
         }
         &Inst::CCmpImm { rn, .. } => {
@@ -2162,7 +2168,7 @@ fn aarch64_map_regs<RUM: RegUsageMapper>(inst: &mut Inst, mapper: &RUM) {
             map_use(mapper, rn);
             map_use(mapper, rm);
         }
-        &mut Inst::CSet { ref mut rd, .. } => {
+        &mut Inst::CSet { ref mut rd, .. } | &mut Inst::CSetm { ref mut rd, .. } => {
             map_def(mapper, rd);
         }
         &mut Inst::CCmpImm { ref mut rn, .. } => {
@@ -3108,6 +3114,11 @@ impl Inst {
                 let cond = cond.show_rru(mb_rru);
                 format!("cset {}, {}", rd, cond)
             }
+            &Inst::CSetm { rd, cond } => {
+                let rd = rd.to_reg().show_rru(mb_rru);
+                let cond = cond.show_rru(mb_rru);
+                format!("csetm {}, {}", rd, cond)
+            }
             &Inst::CCmpImm {
                 size,
                 rn,
@@ -3628,63 +3639,60 @@ impl Inst {
             &Inst::Extend {
                 rd,
                 rn,
-                signed,
-                from_bits,
-                to_bits,
-            } if from_bits >= 8 => {
-                // Is the destination a 32-bit register? Corresponds to whether
-                // extend-to width is <= 32 bits, *unless* we have an unsigned
-                // 32-to-64-bit extension, which is implemented with a "mov" to a
-                // 32-bit (W-reg) dest, because this zeroes the top 32 bits.
-                let dest_size = if !signed && from_bits == 32 && to_bits == 64 {
-                    OperandSize::Size32
-                } else {
-                    OperandSize::from_bits(to_bits)
-                };
-                let rd = show_ireg_sized(rd.to_reg(), mb_rru, dest_size);
-                let rn = show_ireg_sized(rn, mb_rru, OperandSize::from_bits(from_bits));
-                let op = match (signed, from_bits, to_bits) {
-                    (false, 8, 32) => "uxtb",
-                    (true, 8, 32) => "sxtb",
-                    (false, 16, 32) => "uxth",
-                    (true, 16, 32) => "sxth",
-                    (false, 8, 64) => "uxtb",
-                    (true, 8, 64) => "sxtb",
-                    (false, 16, 64) => "uxth",
-                    (true, 16, 64) => "sxth",
-                    (false, 32, 64) => "mov", // special case (see above).
-                    (true, 32, 64) => "sxtw",
-                    _ => panic!("Unsupported Extend case: {:?}", self),
-                };
-                format!("{} {}, {}", op, rd, rn)
-            }
-            &Inst::Extend {
-                rd,
-                rn,
-                signed,
-                from_bits,
-                to_bits,
-            } if from_bits == 1 && signed => {
-                let dest_size = OperandSize::from_bits(to_bits);
-                let zr = if dest_size.is32() { "wzr" } else { "xzr" };
-                let rd32 = show_ireg_sized(rd.to_reg(), mb_rru, OperandSize::Size32);
-                let rd = show_ireg_sized(rd.to_reg(), mb_rru, dest_size);
-                let rn = show_ireg_sized(rn, mb_rru, OperandSize::Size32);
-                format!("and {}, {}, #1 ; sub {}, {}, {}", rd32, rn, rd, zr, rd)
-            }
-            &Inst::Extend {
-                rd,
-                rn,
-                signed,
-                from_bits,
+                signed: false,
+                from_bits: 1,
                 ..
-            } if from_bits == 1 && !signed => {
+            } => {
                 let rd = show_ireg_sized(rd.to_reg(), mb_rru, OperandSize::Size32);
                 let rn = show_ireg_sized(rn, mb_rru, OperandSize::Size32);
                 format!("and {}, {}, #1", rd, rn)
             }
-            &Inst::Extend { .. } => {
-                panic!("Unsupported Extend case");
+            &Inst::Extend {
+                rd,
+                rn,
+                signed: false,
+                from_bits: 32,
+                to_bits: 64,
+            } => {
+                // The case of a zero extension from 32 to 64 bits, is implemented
+                // with a "mov" to a 32-bit (W-reg) dest, because this zeroes
+                // the top 32 bits.
+                let rd = show_ireg_sized(rd.to_reg(), mb_rru, OperandSize::Size32);
+                let rn = show_ireg_sized(rn, mb_rru, OperandSize::Size32);
+                format!("mov {}, {}", rd, rn)
+            }
+            &Inst::Extend {
+                rd,
+                rn,
+                signed,
+                from_bits,
+                to_bits,
+            } => {
+                assert!(from_bits <= to_bits);
+                let op = match (signed, from_bits) {
+                    (false, 8) => "uxtb",
+                    (true, 8) => "sxtb",
+                    (false, 16) => "uxth",
+                    (true, 16) => "sxth",
+                    (true, 32) => "sxtw",
+                    (true, _) => "sbfx",
+                    (false, _) => "ubfx",
+                };
+                if op == "sbfx" || op == "ubfx" {
+                    let dest_size = OperandSize::from_bits(to_bits);
+                    let rd = show_ireg_sized(rd.to_reg(), mb_rru, dest_size);
+                    let rn = show_ireg_sized(rn, mb_rru, dest_size);
+                    format!("{} {}, {}, #0, #{}", op, rd, rn, from_bits)
+                } else {
+                    let dest_size = if signed {
+                        OperandSize::from_bits(to_bits)
+                    } else {
+                        OperandSize::Size32
+                    };
+                    let rd = show_ireg_sized(rd.to_reg(), mb_rru, dest_size);
+                    let rn = show_ireg_sized(rn, mb_rru, OperandSize::from_bits(from_bits));
+                    format!("{} {}, {}", op, rd, rn)
+                }
             }
             &Inst::Call { .. } => format!("bl 0"),
             &Inst::CallInd { ref info, .. } => {
diff --git a/cranelift/codegen/src/isa/aarch64/lower.rs b/cranelift/codegen/src/isa/aarch64/lower.rs
index 8c94aad5fb..45b3b4b832 100644
--- a/cranelift/codegen/src/isa/aarch64/lower.rs
+++ b/cranelift/codegen/src/isa/aarch64/lower.rs
@@ -1152,21 +1152,21 @@ pub(crate) fn lower_fcmp_or_ffcmp_to_flags<C: LowerCtx<I = Inst>>(ctx: &mut C, i
     }
 }
 
-/// Convert a 0 / 1 result, such as from a conditional-set instruction, into a 0
-/// / -1 (all-ones) result as expected for bool operations.
-pub(crate) fn normalize_bool_result<C: LowerCtx<I = Inst>>(
+/// Materialize a boolean value into a register from the flags
+/// (e.g set by a comparison).
+/// A 0 / -1 (all-ones) result as expected for bool operations.
+pub(crate) fn materialize_bool_result<C: LowerCtx<I = Inst>>(
     ctx: &mut C,
     insn: IRInst,
     rd: Writable<Reg>,
+    cond: Cond,
 ) {
-    // A boolean is 0 / -1; if output width is > 1, negate.
+    // A boolean is 0 / -1; if output width is > 1 use `csetm`,
+    // otherwise use `cset`.
     if ty_bits(ctx.output_ty(insn, 0)) > 1 {
-        ctx.emit(Inst::AluRRR {
-            alu_op: ALUOp::Sub64,
-            rd,
-            rn: zero_reg(),
-            rm: rd.to_reg(),
-        });
+        ctx.emit(Inst::CSetm { rd, cond });
+    } else {
+        ctx.emit(Inst::CSet { rd, cond });
     }
 }
 
diff --git a/cranelift/codegen/src/isa/aarch64/lower_inst.rs b/cranelift/codegen/src/isa/aarch64/lower_inst.rs
index 8113221ab5..35bbdc7ee8 100644
--- a/cranelift/codegen/src/isa/aarch64/lower_inst.rs
+++ b/cranelift/codegen/src/isa/aarch64/lower_inst.rs
@@ -1521,8 +1521,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
             let ifcmp_insn = maybe_input_insn(ctx, inputs[0], Opcode::Ifcmp).unwrap();
             lower_icmp_or_ifcmp_to_flags(ctx, ifcmp_insn, is_signed);
             let rd = get_output_reg(ctx, outputs[0]);
-            ctx.emit(Inst::CSet { rd, cond });
-            normalize_bool_result(ctx, insn, rd);
+            materialize_bool_result(ctx, insn, rd, cond);
         }
 
         Opcode::Trueff => {
@@ -1531,8 +1530,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
             let ffcmp_insn = maybe_input_insn(ctx, inputs[0], Opcode::Ffcmp).unwrap();
             lower_fcmp_or_ffcmp_to_flags(ctx, ffcmp_insn);
             let rd = get_output_reg(ctx, outputs[0]);
-            ctx.emit(Inst::CSet { rd, cond });
-            normalize_bool_result(ctx, insn, rd);
+            materialize_bool_result(ctx, insn, rd, cond);
         }
 
         Opcode::IsNull | Opcode::IsInvalid => {
@@ -1555,8 +1553,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
             };
             let const_value = ResultRSEImm12::Imm12(Imm12::maybe_from_u64(const_value).unwrap());
             ctx.emit(alu_inst_imm12(alu_op, writable_zero_reg(), rn, const_value));
-            ctx.emit(Inst::CSet { rd, cond: Cond::Eq });
-            normalize_bool_result(ctx, insn, rd);
+            materialize_bool_result(ctx, insn, rd, Cond::Eq);
         }
 
         Opcode::Copy => {
@@ -1581,11 +1578,6 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
             //   sign-extend the -1 to a -1 in the wider width.
             // - Bmask, because the resulting integer mask value must be
             //   all-ones (-1) if the argument is true.
-            //
-            // For a sign-extension from a 1-bit value (Case 1 below), we need
-            // to do things a bit specially, because the ISA does not have a
-            // 1-to-N-bit sign extension instruction.  For 8-bit or wider
-            // sources (Case 2 below), we do a sign extension normally.
 
             let from_ty = ctx.input_ty(insn, 0);
             let to_ty = ctx.output_ty(insn, 0);
@@ -1600,41 +1592,23 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
 
             if from_bits == to_bits {
                 // Nothing.
-            } else if from_bits == 1 {
-                assert!(to_bits >= 8);
-                // Case 1: 1-bit to N-bit extension: AND the LSB of source into
-                // dest, generating a value of 0 or 1, then negate to get
-                // 0x000... or 0xfff...
+            } else {
                 let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
                 let rd = get_output_reg(ctx, outputs[0]);
-                // AND Rdest, Rsource, #1
-                ctx.emit(Inst::AluRRImmLogic {
-                    alu_op: ALUOp::And64,
-                    rd,
-                    rn,
-                    imml: ImmLogic::maybe_from_u64(1, I64).unwrap(),
-                });
-                // SUB Rdest, XZR, Rdest  (i.e., NEG Rdest)
-                ctx.emit(Inst::AluRRR {
-                    alu_op: ALUOp::Sub64,
-                    rd,
-                    rn: zero_reg(),
-                    rm: rd.to_reg(),
-                });
-            } else {
-                // Case 2: 8-or-more-bit to N-bit extension: just sign-extend. A
-                // `true` (all ones, or `-1`) will be extended to -1 with the
-                // larger width.
-                assert!(from_bits >= 8);
-                let narrow_mode = if to_bits == 64 {
-                    NarrowValueMode::SignExtend64
+                let to_bits = if to_bits == 64 {
+                    64
                 } else {
                     assert!(to_bits <= 32);
-                    NarrowValueMode::SignExtend32
+                    32
                 };
-                let rn = put_input_in_reg(ctx, inputs[0], narrow_mode);
-                let rd = get_output_reg(ctx, outputs[0]);
-                ctx.emit(Inst::gen_move(rd, rn, to_ty));
+                let from_bits = from_bits as u8;
+                ctx.emit(Inst::Extend {
+                    rd,
+                    rn,
+                    signed: true,
+                    from_bits,
+                    to_bits,
+                });
             }
         }
 
@@ -1745,8 +1719,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
                 let alu_op = choose_32_64(ty, ALUOp::SubS32, ALUOp::SubS64);
                 let rm = put_input_in_rse_imm12(ctx, inputs[1], narrow_mode);
                 ctx.emit(alu_inst_imm12(alu_op, writable_zero_reg(), rn, rm));
-                ctx.emit(Inst::CSet { cond, rd });
-                normalize_bool_result(ctx, insn, rd);
+                materialize_bool_result(ctx, insn, rd, cond);
             } else {
                 let rm = put_input_in_reg(ctx, inputs[1], narrow_mode);
                 lower_vector_compare(ctx, rd, rn, rm, ty, cond)?;
@@ -1771,8 +1744,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
                     }
                     _ => panic!("Bad float size"),
                 }
-                ctx.emit(Inst::CSet { cond, rd });
-                normalize_bool_result(ctx, insn, rd);
+                materialize_bool_result(ctx, insn, rd, cond);
             } else {
                 lower_vector_compare(ctx, rd, rn, rm, ty, cond)?;
             }
@@ -2105,8 +2077,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
                 imm12: Imm12::zero(),
             });
 
-            ctx.emit(Inst::CSet { rd, cond: Cond::Ne });
-            normalize_bool_result(ctx, insn, rd);
+            materialize_bool_result(ctx, insn, rd, Cond::Ne);
         }
 
         Opcode::VhighBits => {
diff --git a/cranelift/filetests/filetests/isa/aarch64/bitops.clif b/cranelift/filetests/filetests/isa/aarch64/bitops.clif
index c7ddd04608..ab1c113104 100644
--- a/cranelift/filetests/filetests/isa/aarch64/bitops.clif
+++ b/cranelift/filetests/filetests/isa/aarch64/bitops.clif
@@ -281,7 +281,7 @@ block0(v0: i16):
 
 ; check: stp fp, lr, [sp, #-16]!
 ; nextln: mov fp, sp
-; nextln: uxth x0, w0
+; nextln: uxth w0, w0
 ; nextln: lsr w1, w0, #1
 ; nextln: and x1, x1, #6148914691236517205
 ; nextln: sub x1, x0, x1
@@ -307,7 +307,7 @@ block0(v0: i8):
 
 ; check: stp fp, lr, [sp, #-16]!
 ; nextln: mov fp, sp
-; nextln: uxtb x0, w0
+; nextln: uxtb w0, w0
 ; nextln: lsr w1, w0, #1
 ; nextln: and x1, x1, #6148914691236517205
 ; nextln: sub x1, x0, x1
@@ -324,3 +324,33 @@ block0(v0: i8):
 ; nextln: mov sp, fp
 ; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret
+
+function %bextend_b8() -> b32 {
+block0:
+    v1 = bconst.b8 true
+    v2 = bextend.b32 v1
+    return v2
+}
+
+; check: stp fp, lr, [sp, #-16]!
+; nextln: mov fp, sp
+; nextln: movz x0, #255
+; nextln: sxtb w0, w0
+; nextln: mov sp, fp
+; nextln: ldp fp, lr, [sp], #16
+; nextln: ret
+
+function %bextend_b1() -> b32 {
+block0:
+    v1 = bconst.b1 true
+    v2 = bextend.b32 v1
+    return v2
+}
+
+; check: stp fp, lr, [sp, #-16]!
+; nextln: mov fp, sp
+; nextln: movz x0, #1
+; nextln: sbfx w0, w0, #0, #1
+; nextln: mov sp, fp
+; nextln: ldp fp, lr, [sp], #16
+; nextln: ret
diff --git a/cranelift/filetests/filetests/isa/aarch64/saturating-ops.clif b/cranelift/filetests/filetests/isa/aarch64/saturating-ops.clif
index 7116205dd5..e0bf7c5b3f 100644
--- a/cranelift/filetests/filetests/isa/aarch64/saturating-ops.clif
+++ b/cranelift/filetests/filetests/isa/aarch64/saturating-ops.clif
@@ -25,8 +25,8 @@ block0(v0: i8, v1: i8):
 
 ; check: stp fp, lr, [sp, #-16]!
 ; nextln: mov fp, sp
-; nextln: uxtb x0, w0
-; nextln: uxtb x1, w1
+; nextln: uxtb w0, w0
+; nextln: uxtb w1, w1
 ; nextln: fmov d0, x0
 ; nextln: fmov d1, x1
 ; nextln: uqadd d0, d0, d1
diff --git a/cranelift/filetests/filetests/isa/aarch64/uextend-sextend.clif b/cranelift/filetests/filetests/isa/aarch64/uextend-sextend.clif
index 803f144844..8823351207 100644
--- a/cranelift/filetests/filetests/isa/aarch64/uextend-sextend.clif
+++ b/cranelift/filetests/filetests/isa/aarch64/uextend-sextend.clif
@@ -9,7 +9,7 @@ block0(v0: i8):
 
 ; check: stp fp, lr, [sp, #-16]!
 ; nextln: mov fp, sp
-; nextln: uxtb x0, w0
+; nextln: uxtb w0, w0
 ; nextln: mov sp, fp
 ; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret
@@ -87,7 +87,7 @@ block0(v0: i16):
 
 ; check: stp fp, lr, [sp, #-16]!
 ; nextln: mov fp, sp
-; nextln: uxth x0, w0
+; nextln: uxth w0, w0
 ; nextln: mov sp, fp
 ; nextln: ldp fp, lr, [sp], #16
 ; nextln: ret