diff --git a/cranelift/codegen/src/isa/aarch64/inst/emit.rs b/cranelift/codegen/src/isa/aarch64/inst/emit.rs index 0b93effada..302f388d35 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/emit.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/emit.rs @@ -1380,13 +1380,6 @@ impl MachInstEmit for Inst { &Inst::MovFromNZCV { rd } => { sink.put4(0xd53b4200 | machreg_to_gpr(rd.to_reg())); } - &Inst::CondSet { rd, cond } => { - sink.put4( - 0b100_11010100_11111_0000_01_11111_00000 - | (cond.invert().bits() << 12) - | machreg_to_gpr(rd.to_reg()), - ); - } &Inst::Extend { rd, rn, diff --git a/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs b/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs index a06111e897..827601a611 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs @@ -1888,14 +1888,6 @@ fn test_aarch64_binemit() { "1B423BD5", "mrs x27, nzcv", )); - insns.push(( - Inst::CondSet { - rd: writable_xreg(5), - cond: Cond::Hi, - }, - "E5979F9A", - "cset x5, hi", - )); insns.push(( Inst::VecDup { rd: writable_vreg(25), diff --git a/cranelift/codegen/src/isa/aarch64/inst/mod.rs b/cranelift/codegen/src/isa/aarch64/inst/mod.rs index eb9c159670..cb3e992519 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/mod.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/mod.rs @@ -809,12 +809,6 @@ pub enum Inst { rd: Writable, }, - /// Set a register to 1 if condition, else 0. - CondSet { - rd: Writable, - cond: Cond, - }, - /// A machine call instruction. N.B.: this allows only a +/- 128MB offset (it uses a relocation /// of type `Reloc::Arm64Call`); if the destination distance is not `RelocDistance::Near`, the /// code should use a `LoadExtName` / `CallInd` sequence instead, allowing an arbitrary 64-bit @@ -1358,9 +1352,6 @@ fn aarch64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) { &Inst::MovFromNZCV { rd } => { collector.add_def(rd); } - &Inst::CondSet { rd, .. } => { - collector.add_def(rd); - } &Inst::Extend { rd, rn, .. } => { collector.add_def(rd); collector.add_use(rn); @@ -1954,9 +1945,6 @@ fn aarch64_map_regs(inst: &mut Inst, mapper: &RUM) { &mut Inst::MovFromNZCV { ref mut rd } => { map_def(mapper, rd); } - &mut Inst::CondSet { ref mut rd, .. } => { - map_def(mapper, rd); - } &mut Inst::Extend { ref mut rd, ref mut rn, @@ -2830,11 +2818,6 @@ impl Inst { let rd = rd.to_reg().show_rru(mb_rru); format!("mrs {}, nzcv", rd) } - &Inst::CondSet { rd, cond } => { - let rd = rd.to_reg().show_rru(mb_rru); - let cond = cond.show_rru(mb_rru); - format!("cset {}, {}", rd, cond) - } &Inst::Extend { rd, rn, diff --git a/cranelift/codegen/src/isa/aarch64/lower.rs b/cranelift/codegen/src/isa/aarch64/lower.rs index dedba775ae..d8011d0789 100644 --- a/cranelift/codegen/src/isa/aarch64/lower.rs +++ b/cranelift/codegen/src/isa/aarch64/lower.rs @@ -1014,6 +1014,24 @@ pub(crate) fn lower_fcmp_or_ffcmp_to_flags>(ctx: &mut C, i } } +/// Convert a 0 / 1 result, such as from a conditional-set instruction, into a 0 +/// / -1 (all-ones) result as expected for bool operations. +pub(crate) fn normalize_bool_result>( + ctx: &mut C, + insn: IRInst, + rd: Writable, +) { + // A boolean is 0 / -1; if output width is > 1, negate. + if ty_bits(ctx.output_ty(insn, 0)) > 1 { + ctx.emit(Inst::AluRRR { + alu_op: ALUOp::Sub64, + rd, + rn: zero_reg(), + rm: rd.to_reg(), + }); + } +} + //============================================================================= // Lowering-backend trait implementation. diff --git a/cranelift/codegen/src/isa/aarch64/lower_inst.rs b/cranelift/codegen/src/isa/aarch64/lower_inst.rs index 9dec7f9c7f..761f18130c 100644 --- a/cranelift/codegen/src/isa/aarch64/lower_inst.rs +++ b/cranelift/codegen/src/isa/aarch64/lower_inst.rs @@ -1208,6 +1208,7 @@ pub(crate) fn lower_insn_to_regs>( lower_icmp_or_ifcmp_to_flags(ctx, ifcmp_insn, is_signed); let rd = get_output_reg(ctx, outputs[0]); ctx.emit(Inst::CSet { rd, cond }); + normalize_bool_result(ctx, insn, rd); } Opcode::Trueff => { @@ -1217,6 +1218,7 @@ pub(crate) fn lower_insn_to_regs>( lower_fcmp_or_ffcmp_to_flags(ctx, ffcmp_insn); let rd = get_output_reg(ctx, outputs[0]); ctx.emit(Inst::CSet { rd, cond }); + normalize_bool_result(ctx, insn, rd); } Opcode::IsNull | Opcode::IsInvalid => { @@ -1240,6 +1242,7 @@ pub(crate) fn lower_insn_to_regs>( let const_value = ResultRSEImm12::Imm12(Imm12::maybe_from_u64(const_value).unwrap()); ctx.emit(alu_inst_imm12(alu_op, writable_zero_reg(), rn, const_value)); ctx.emit(Inst::CSet { rd, cond: Cond::Eq }); + normalize_bool_result(ctx, insn, rd); } Opcode::Copy => { @@ -1249,49 +1252,95 @@ pub(crate) fn lower_insn_to_regs>( ctx.emit(Inst::gen_move(rd, rn, ty)); } - Opcode::Bint | Opcode::Breduce | Opcode::Bextend | Opcode::Ireduce => { - // If this is a Bint from a Trueif/Trueff/IsNull/IsInvalid, then the result is already - // 64-bit-zero-extended, even if the CLIF type doesn't say so, because it was produced - // by a CSet. In this case, we do not need to do any zero-extension. - let input_info = ctx.get_input(insn, 0); - let src_op = input_info - .inst - .map(|(src_inst, _)| ctx.data(src_inst).opcode()); - let narrow_mode = match (src_op, op) { - (Some(Opcode::Trueif), Opcode::Bint) - | (Some(Opcode::Trueff), Opcode::Bint) - | (Some(Opcode::IsNull), Opcode::Bint) - | (Some(Opcode::IsInvalid), Opcode::Bint) => NarrowValueMode::None, - _ => NarrowValueMode::ZeroExtend64, - }; - - // All of these ops are simply a move from a zero-extended source. - // Here is why this works, in each case: - // - // - Bint: Bool-to-int. We always represent a bool as a 0 or 1, so we - // merely need to zero-extend here. - // - // - Breduce, Bextend: changing width of a boolean. We represent a - // bool as a 0 or 1, so again, this is a zero-extend / no-op. - // - // - Ireduce: changing width of an integer. Smaller ints are stored - // with undefined high-order bits, so we can simply do a copy. - - let rn = put_input_in_reg(ctx, inputs[0], narrow_mode); + Opcode::Breduce | Opcode::Ireduce => { + // Smaller integers/booleans are stored with high-order bits + // undefined, so we can simply do a copy. + let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); let rd = get_output_reg(ctx, outputs[0]); let ty = ctx.input_ty(insn, 0); ctx.emit(Inst::gen_move(rd, rn, ty)); } - Opcode::Bmask => { - // Bool is {0, 1}, so we can subtract from 0 to get all-1s. + Opcode::Bextend | Opcode::Bmask => { + // Bextend and Bmask both simply sign-extend. This works for: + // - Bextend, because booleans are stored as 0 / -1, so we + // sign-extend the -1 to a -1 in the wider width. + // - Bmask, because the resulting integer mask value must be + // all-ones (-1) if the argument is true. + // + // For a sign-extension from a 1-bit value (Case 1 below), we need + // to do things a bit specially, because the ISA does not have a + // 1-to-N-bit sign extension instruction. For 8-bit or wider + // sources (Case 2 below), we do a sign extension normally. + + let from_ty = ctx.input_ty(insn, 0); + let to_ty = ctx.output_ty(insn, 0); + let from_bits = ty_bits(from_ty); + let to_bits = ty_bits(to_ty); + + assert!( + from_bits <= 64 && to_bits <= 64, + "Vector Bextend not supported yet" + ); + assert!(from_bits <= to_bits); + + if from_bits == to_bits { + // Nothing. + } else if from_bits == 1 { + assert!(to_bits >= 8); + // Case 1: 1-bit to N-bit extension: AND the LSB of source into + // dest, generating a value of 0 or 1, then negate to get + // 0x000... or 0xfff... + let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); + let rd = get_output_reg(ctx, outputs[0]); + // AND Rdest, Rsource, #1 + ctx.emit(Inst::AluRRImmLogic { + alu_op: ALUOp::And64, + rd, + rn, + imml: ImmLogic::maybe_from_u64(1, I64).unwrap(), + }); + // SUB Rdest, XZR, Rdest (i.e., NEG Rdest) + ctx.emit(Inst::AluRRR { + alu_op: ALUOp::Sub64, + rd, + rn: zero_reg(), + rm: rd.to_reg(), + }); + } else { + // Case 2: 8-or-more-bit to N-bit extension: just sign-extend. A + // `true` (all ones, or `-1`) will be extended to -1 with the + // larger width. + assert!(from_bits >= 8); + let narrow_mode = if to_bits == 64 { + NarrowValueMode::SignExtend64 + } else { + assert!(to_bits <= 32); + NarrowValueMode::SignExtend32 + }; + let rn = put_input_in_reg(ctx, inputs[0], narrow_mode); + let rd = get_output_reg(ctx, outputs[0]); + ctx.emit(Inst::gen_move(rd, rn, to_ty)); + } + } + + Opcode::Bint => { + // Booleans are stored as all-zeroes (0) or all-ones (-1). We AND + // out the LSB to give a 0 / 1-valued integer result. + let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); let rd = get_output_reg(ctx, outputs[0]); - let rm = put_input_in_reg(ctx, inputs[0], NarrowValueMode::ZeroExtend64); - ctx.emit(Inst::AluRRR { - alu_op: ALUOp::Sub64, + let output_bits = ty_bits(ctx.output_ty(insn, 0)); + + let (imm_ty, alu_op) = if output_bits > 32 { + (I64, ALUOp::And64) + } else { + (I32, ALUOp::And32) + }; + ctx.emit(Inst::AluRRImmLogic { + alu_op, rd, - rn: zero_reg(), - rm, + rn, + imml: ImmLogic::maybe_from_u64(1, imm_ty).unwrap(), }); } @@ -1369,7 +1418,8 @@ pub(crate) fn lower_insn_to_regs>( let alu_op = choose_32_64(ty, ALUOp::SubS32, ALUOp::SubS64); let rm = put_input_in_rse_imm12(ctx, inputs[1], narrow_mode); ctx.emit(alu_inst_imm12(alu_op, writable_zero_reg(), rn, rm)); - ctx.emit(Inst::CondSet { cond, rd }); + ctx.emit(Inst::CSet { cond, rd }); + normalize_bool_result(ctx, insn, rd); } else { let rm = put_input_in_reg(ctx, inputs[1], narrow_mode); lower_vector_compare(ctx, rd, rn, rm, ty, cond)?; @@ -1394,7 +1444,8 @@ pub(crate) fn lower_insn_to_regs>( } _ => panic!("Bad float size"), } - ctx.emit(Inst::CondSet { cond, rd }); + ctx.emit(Inst::CSet { cond, rd }); + normalize_bool_result(ctx, insn, rd); } else { lower_vector_compare(ctx, rd, rn, rm, ty, cond)?; } @@ -1659,6 +1710,7 @@ pub(crate) fn lower_insn_to_regs>( }); ctx.emit(Inst::CSet { rd, cond: Cond::Ne }); + normalize_bool_result(ctx, insn, rd); } Opcode::Shuffle