diff --git a/cranelift/codegen/src/isa/aarch64/inst.isle b/cranelift/codegen/src/isa/aarch64/inst.isle index 0faa1280f9..1f8122188d 100644 --- a/cranelift/codegen/src/isa/aarch64/inst.isle +++ b/cranelift/codegen/src/isa/aarch64/inst.isle @@ -210,6 +210,14 @@ (rd WritableReg) (cond Cond)) + ;; A conditional comparison with a second register. + (CCmp + (size OperandSize) + (rn Reg) + (rm Reg) + (nzcv NZCV) + (cond Cond)) + ;; A conditional comparison with an immediate. (CCmpImm (size OperandSize) diff --git a/cranelift/codegen/src/isa/aarch64/inst/emit.rs b/cranelift/codegen/src/isa/aarch64/inst/emit.rs index 6ec2358fe4..40e4d289ec 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/emit.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/emit.rs @@ -353,6 +353,15 @@ fn enc_fcsel(rd: Writable, rn: Reg, rm: Reg, cond: Cond, size: ScalarSize) | (cond.bits() << 12) } +fn enc_ccmp(size: OperandSize, rn: Reg, rm: Reg, nzcv: NZCV, cond: Cond) -> u32 { + 0b0_1_1_11010010_00000_0000_00_00000_0_0000 + | size.sf_bit() << 31 + | machreg_to_gpr(rm) << 16 + | cond.bits() << 12 + | machreg_to_gpr(rn) << 5 + | nzcv.bits() +} + fn enc_ccmp_imm(size: OperandSize, rn: Reg, imm: UImm5, nzcv: NZCV, cond: Cond) -> u32 { 0b0_1_1_11010010_00000_0000_10_00000_0_0000 | size.sf_bit() << 31 @@ -1367,6 +1376,17 @@ impl MachInstEmit for Inst { let rd = allocs.next_writable(rd); sink.put4(enc_csel(rd, zero_reg(), zero_reg(), cond.invert(), 1, 0)); } + &Inst::CCmp { + size, + rn, + rm, + nzcv, + cond, + } => { + let rn = allocs.next(rn); + let rm = allocs.next(rm); + sink.put4(enc_ccmp(size, rn, rm, nzcv, cond)); + } &Inst::CCmpImm { size, rn, diff --git a/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs b/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs index 2ecb20d4df..daa33fed46 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs @@ -2182,6 +2182,28 @@ fn test_aarch64_binemit() { "F0739FDA", "csetm x16, vs", )); + insns.push(( + Inst::CCmp { + size: OperandSize::Size64, + rn: xreg(22), + rm: xreg(1), + nzcv: NZCV::new(false, false, true, true), + cond: Cond::Eq, + }, + "C30241FA", + "ccmp x22, x1, #nzCV, eq", + )); + insns.push(( + Inst::CCmp { + size: OperandSize::Size32, + rn: xreg(3), + rm: xreg(28), + nzcv: NZCV::new(true, true, true, true), + cond: Cond::Gt, + }, + "6FC05C7A", + "ccmp w3, w28, #NZCV, gt", + )); insns.push(( Inst::CCmpImm { size: OperandSize::Size64, diff --git a/cranelift/codegen/src/isa/aarch64/inst/mod.rs b/cranelift/codegen/src/isa/aarch64/inst/mod.rs index 241d0f79f3..96a5c0b37b 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/mod.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/mod.rs @@ -674,6 +674,10 @@ fn aarch64_get_operands VReg>(inst: &Inst, collector: &mut Operan &Inst::CSet { rd, .. } | &Inst::CSetm { rd, .. } => { collector.reg_def(rd); } + &Inst::CCmp { rn, rm, .. } => { + collector.reg_use(rn); + collector.reg_use(rm); + } &Inst::CCmpImm { rn, .. } => { collector.reg_use(rn); } @@ -1531,6 +1535,19 @@ impl Inst { let cond = cond.pretty_print(0, allocs); format!("csetm {}, {}", rd, cond) } + &Inst::CCmp { + size, + rn, + rm, + nzcv, + cond, + } => { + let rn = pretty_print_ireg(rn, size, allocs); + let rm = pretty_print_ireg(rm, size, allocs); + let nzcv = nzcv.pretty_print(0, allocs); + let cond = cond.pretty_print(0, allocs); + format!("ccmp {}, {}, {}, {}", rn, rm, nzcv, cond) + } &Inst::CCmpImm { size, rn, diff --git a/cranelift/codegen/src/isa/aarch64/lower.rs b/cranelift/codegen/src/isa/aarch64/lower.rs index 72194f7e89..3854b786bd 100644 --- a/cranelift/codegen/src/isa/aarch64/lower.rs +++ b/cranelift/codegen/src/isa/aarch64/lower.rs @@ -1229,31 +1229,23 @@ pub(crate) fn lower_icmp( match condcode { IntCC::Equal | IntCC::NotEqual => { - // eor tmp1, lhs_lo, rhs_lo - // eor tmp2, lhs_hi, rhs_hi - // adds xzr, tmp1, tmp2 - // cset dst, {eq, ne} + // cmp lhs_lo, rhs_lo + // ccmp lhs_hi, rhs_hi, #0, eq + // cset dst, {eq, ne} ctx.emit(Inst::AluRRR { - alu_op: ALUOp::Eor, + alu_op: ALUOp::SubS, size: OperandSize::Size64, - rd: tmp1, + rd: writable_zero_reg(), rn: lhs.regs()[0], rm: rhs.regs()[0], }); - ctx.emit(Inst::AluRRR { - alu_op: ALUOp::Eor, + ctx.emit(Inst::CCmp { size: OperandSize::Size64, - rd: tmp2, rn: lhs.regs()[1], rm: rhs.regs()[1], - }); - ctx.emit(Inst::AluRRR { - alu_op: ALUOp::AddS, - size: OperandSize::Size64, - rd: writable_zero_reg(), - rn: tmp1.to_reg(), - rm: tmp2.to_reg(), + nzcv: NZCV::new(false, false, false, false), + cond: Cond::Eq, }); cond } diff --git a/cranelift/filetests/filetests/isa/aarch64/condbr.clif b/cranelift/filetests/filetests/isa/aarch64/condbr.clif index c634685c71..87498c0ff7 100644 --- a/cranelift/filetests/filetests/isa/aarch64/condbr.clif +++ b/cranelift/filetests/filetests/isa/aarch64/condbr.clif @@ -20,9 +20,8 @@ block0(v0: i128, v1: i128): } ; block0: -; eor x10, x0, x2 -; eor x12, x1, x3 -; adds xzr, x10, x12 +; subs xzr, x0, x2 +; ccmp x1, x3, #nzcv, eq ; cset x0, eq ; ret @@ -33,9 +32,8 @@ block0(v0: i128, v1: i128): } ; block0: -; eor x10, x0, x2 -; eor x12, x1, x3 -; adds xzr, x10, x12 +; subs xzr, x0, x2 +; ccmp x1, x3, #nzcv, eq ; cset x0, ne ; ret @@ -278,9 +276,8 @@ block1: } ; block0: -; eor x8, x0, x2 -; eor x10, x1, x3 -; adds xzr, x8, x10 +; subs xzr, x0, x2 +; ccmp x1, x3, #nzcv, eq ; b.eq label1 ; b label2 ; block1: ; b label3 @@ -299,9 +296,8 @@ block1: } ; block0: -; eor x8, x0, x2 -; eor x10, x1, x3 -; adds xzr, x8, x10 +; subs xzr, x0, x2 +; ccmp x1, x3, #nzcv, eq ; b.ne label1 ; b label2 ; block1: ; b label3 diff --git a/cranelift/filetests/filetests/runtests/i128-icmp.clif b/cranelift/filetests/filetests/runtests/i128-icmp.clif index ecfc6bc835..2c48f84985 100644 --- a/cranelift/filetests/filetests/runtests/i128-icmp.clif +++ b/cranelift/filetests/filetests/runtests/i128-icmp.clif @@ -20,6 +20,9 @@ block0(v0: i128, v1: i128): ; run: %icmp_eq_i128(0xFFFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 0x00000000_00000001_00000000_00000001) == false ; run: %icmp_eq_i128(0x00000000_00000001_FFFFFFFF_FFFFFFFF, 0x00000000_00000001_00000000_00000001) == false +; This is a regression test for aarch64, see: https://github.com/bytecodealliance/wasmtime/issues/4705 +; run: %icmp_eq_i128(36893488147419103231, 0) == false + function %icmp_ne_i128(i128, i128) -> b1 { block0(v0: i128, v1: i128):