From e9727b9d4b7679e34ee50f66b21e59a32eeb18de Mon Sep 17 00:00:00 2001 From: Afonso Bordado Date: Thu, 7 Jul 2022 19:00:58 +0100 Subject: [PATCH] aarch64: Fix i128 `of`/`nof` implementations (#4403) @yuyang-ok reported via zulip that i128 overflow tests were: 1. different from the interpreter implementation 2. wrong on some of the test cases This fixes both the tests and the aarch64 implementation and adds the interpreter to the testsuite. --- cranelift/codegen/src/isa/aarch64/lower.rs | 53 +++++++++++++++---- .../filetests/isa/aarch64/condbr.clif | 36 ++++++++----- .../filetests/runtests/i128-bricmp.clif | 24 +++++---- .../runtests/i128-icmp-overflow.clif | 25 +++++---- 4 files changed, 95 insertions(+), 43 deletions(-) diff --git a/cranelift/codegen/src/isa/aarch64/lower.rs b/cranelift/codegen/src/isa/aarch64/lower.rs index a7dfcc0a5c..5999fbe875 100644 --- a/cranelift/codegen/src/isa/aarch64/lower.rs +++ b/cranelift/codegen/src/isa/aarch64/lower.rs @@ -1284,29 +1284,60 @@ pub(crate) fn lower_icmp>( rn: tmp1.to_reg(), rm: tmp2.to_reg(), }); + cond } IntCC::Overflow | IntCC::NotOverflow => { - // We can do an 128bit add while throwing away the results - // and check the overflow flags at the end. - // - // adds xzr, lhs_lo, rhs_lo - // adcs xzr, lhs_hi, rhs_hi - // cset dst, {vs, vc} + // cmp lhs_lo, rhs_lo + // sbcs tmp1, lhs_hi, rhs_hi + // eor tmp2, lhs_hi, rhs_hi + // eor tmp1, lhs_hi, tmp1 + // tst tmp2, tmp1 + // cset dst, {lt, ge} ctx.emit(Inst::AluRRR { - alu_op: ALUOp::AddS, + alu_op: ALUOp::SubS, size: OperandSize::Size64, rd: writable_zero_reg(), rn: lhs.regs()[0], rm: rhs.regs()[0], }); ctx.emit(Inst::AluRRR { - alu_op: ALUOp::AdcS, + alu_op: ALUOp::SbcS, size: OperandSize::Size64, - rd: writable_zero_reg(), + rd: tmp1, rn: lhs.regs()[1], rm: rhs.regs()[1], }); + ctx.emit(Inst::AluRRR { + alu_op: ALUOp::Eor, + size: OperandSize::Size64, + rd: tmp2, + rn: lhs.regs()[1], + rm: rhs.regs()[1], + }); + ctx.emit(Inst::AluRRR { + alu_op: ALUOp::Eor, + size: OperandSize::Size64, + rd: tmp1, + rn: lhs.regs()[1], + rm: tmp1.to_reg(), + }); + ctx.emit(Inst::AluRRR { + alu_op: ALUOp::AndS, + size: OperandSize::Size64, + rd: writable_zero_reg(), + rn: tmp2.to_reg(), + rm: tmp1.to_reg(), + }); + + // This instruction sequence sets the condition codes + // on the lt and ge flags instead of the vs/vc so we + // need to signal that + if condcode == IntCC::Overflow { + Cond::Lt + } else { + Cond::Ge + } } _ => { // cmp lhs_lo, rhs_lo @@ -1376,9 +1407,9 @@ pub(crate) fn lower_icmp>( // Prevent a second materialize_bool_result to be emitted at the end of the function should_materialize = false; + cond } } - cond } else if ty.is_vector() { assert_ne!(output, IcmpOutput::CondCode); should_materialize = false; @@ -1437,7 +1468,7 @@ pub(crate) fn lower_icmp>( // in a register we materialize those flags into a register. Some branches do end up producing // the result as a register by default, so we ignore those. if should_materialize { - materialize_bool_result(ctx, insn, rd, cond); + materialize_bool_result(ctx, insn, rd, out_condcode); } Ok(match output { diff --git a/cranelift/filetests/filetests/isa/aarch64/condbr.clif b/cranelift/filetests/filetests/isa/aarch64/condbr.clif index 9195757667..c634685c71 100644 --- a/cranelift/filetests/filetests/isa/aarch64/condbr.clif +++ b/cranelift/filetests/filetests/isa/aarch64/condbr.clif @@ -158,9 +158,12 @@ block0(v0: i128, v1: i128): } ; block0: -; adds xzr, x0, x2 -; adcs xzr, x1, x3 -; cset x0, vs +; subs xzr, x0, x2 +; sbcs x11, x1, x3 +; eor x13, x1, x3 +; eor x11, x1, x11 +; ands xzr, x13, x11 +; cset x0, lt ; ret function %icmp_nof_i128(i128, i128) -> b1 { @@ -170,9 +173,12 @@ block0(v0: i128, v1: i128): } ; block0: -; adds xzr, x0, x2 -; adcs xzr, x1, x3 -; cset x0, vc +; subs xzr, x0, x2 +; sbcs x11, x1, x3 +; eor x13, x1, x3 +; eor x11, x1, x11 +; ands xzr, x13, x11 +; cset x0, ge ; ret function %f(i64, i64) -> i64 { @@ -510,9 +516,12 @@ block1: } ; block0: -; adds xzr, x0, x2 -; adcs xzr, x1, x3 -; b.vs label1 ; b label2 +; subs xzr, x0, x2 +; sbcs x9, x1, x3 +; eor x11, x1, x3 +; eor x9, x1, x9 +; ands xzr, x11, x9 +; b.lt label1 ; b label2 ; block1: ; b label3 ; block2: @@ -530,9 +539,12 @@ block1: } ; block0: -; adds xzr, x0, x2 -; adcs xzr, x1, x3 -; b.vc label1 ; b label2 +; subs xzr, x0, x2 +; sbcs x9, x1, x3 +; eor x11, x1, x3 +; eor x9, x1, x9 +; ands xzr, x11, x9 +; b.ge label1 ; b label2 ; block1: ; b label3 ; block2: diff --git a/cranelift/filetests/filetests/runtests/i128-bricmp.clif b/cranelift/filetests/filetests/runtests/i128-bricmp.clif index 29f340fbdb..23a1a17e02 100644 --- a/cranelift/filetests/filetests/runtests/i128-bricmp.clif +++ b/cranelift/filetests/filetests/runtests/i128-bricmp.clif @@ -251,12 +251,14 @@ block2: ; run: %i128_bricmp_of(-1, -1) == false ; run: %i128_bricmp_of(0x80000000_00000000_00000000_00000000, 0) == false ; run: %i128_bricmp_of(0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 0) == false -; run: %i128_bricmp_of(1, 0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF) == true -; run: %i128_bricmp_of(0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 1) == true -; run: %i128_bricmp_of(0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 0x80000000_00000000_00000000_00000000) == false -; run: %i128_bricmp_of(0x80000000_00000000_00000000_00000000, 0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF) == false +; run: %i128_bricmp_of(1, 0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF) == false +; run: %i128_bricmp_of(0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 1) == false +; run: %i128_bricmp_of(0x80000000_00000000_00000000_00000000, 1) == true +; run: %i128_bricmp_of(1, 0x80000000_00000000_00000000_00000000) == true +; run: %i128_bricmp_of(0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 0x80000000_00000000_00000000_00000000) == true +; run: %i128_bricmp_of(0x80000000_00000000_00000000_00000000, 0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF) == true ; run: %i128_bricmp_of(0x4FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 0x30000000_00000000_00000000_00000000) == false -; run: %i128_bricmp_of(0x4FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 0x30000000_00000000_00000000_00000001) == true +; run: %i128_bricmp_of(0x4FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 0x30000000_00000000_00000000_00000001) == false function %i128_bricmp_nof(i128, i128) -> b1 { block0(v0: i128,v1: i128): @@ -277,9 +279,11 @@ block2: ; run: %i128_bricmp_nof(-1, -1) == true ; run: %i128_bricmp_nof(0x80000000_00000000_00000000_00000000, 0) == true ; run: %i128_bricmp_nof(0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 0) == true -; run: %i128_bricmp_nof(1, 0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF) == false -; run: %i128_bricmp_nof(0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 1) == false -; run: %i128_bricmp_nof(0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 0x80000000_00000000_00000000_00000000) == true -; run: %i128_bricmp_nof(0x80000000_00000000_00000000_00000000, 0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF) == true +; run: %i128_bricmp_nof(1, 0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF) == true +; run: %i128_bricmp_nof(0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 1) == true +; run: %i128_bricmp_nof(0x80000000_00000000_00000000_00000000, 1) == false +; run: %i128_bricmp_nof(1, 0x80000000_00000000_00000000_00000000) == false +; run: %i128_bricmp_nof(0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 0x80000000_00000000_00000000_00000000) == false +; run: %i128_bricmp_nof(0x80000000_00000000_00000000_00000000, 0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF) == false ; run: %i128_bricmp_nof(0x4FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 0x30000000_00000000_00000000_00000000) == true -; run: %i128_bricmp_nof(0x4FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 0x30000000_00000000_00000000_00000001) == false +; run: %i128_bricmp_nof(0x4FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 0x30000000_00000000_00000000_00000001) == true diff --git a/cranelift/filetests/filetests/runtests/i128-icmp-overflow.clif b/cranelift/filetests/filetests/runtests/i128-icmp-overflow.clif index abbcd7d2f8..051102b854 100644 --- a/cranelift/filetests/filetests/runtests/i128-icmp-overflow.clif +++ b/cranelift/filetests/filetests/runtests/i128-icmp-overflow.clif @@ -1,3 +1,4 @@ +test interpret test run target aarch64 @@ -12,12 +13,14 @@ block0(v0: i128, v1: i128): ; run: %icmp_of_i128(-1, -1) == false ; run: %icmp_of_i128(0x80000000_00000000_00000000_00000000, 0) == false ; run: %icmp_of_i128(0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 0) == false -; run: %icmp_of_i128(1, 0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF) == true -; run: %icmp_of_i128(0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 1) == true -; run: %icmp_of_i128(0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 0x80000000_00000000_00000000_00000000) == false -; run: %icmp_of_i128(0x80000000_00000000_00000000_00000000, 0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF) == false +; run: %icmp_of_i128(1, 0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF) == false +; run: %icmp_of_i128(0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 1) == false +; run: %icmp_of_i128(0x80000000_00000000_00000000_00000000, 1) == true +; run: %icmp_of_i128(1, 0x80000000_00000000_00000000_00000000) == true +; run: %icmp_of_i128(0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 0x80000000_00000000_00000000_00000000) == true +; run: %icmp_of_i128(0x80000000_00000000_00000000_00000000, 0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF) == true ; run: %icmp_of_i128(0x4FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 0x30000000_00000000_00000000_00000000) == false -; run: %icmp_of_i128(0x4FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 0x30000000_00000000_00000000_00000001) == true +; run: %icmp_of_i128(0x4FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 0x30000000_00000000_00000000_00000001) == false function %icmp_nof_i128(i128, i128) -> b1 { block0(v0: i128, v1: i128): @@ -30,9 +33,11 @@ block0(v0: i128, v1: i128): ; run: %icmp_nof_i128(-1, -1) == true ; run: %icmp_nof_i128(0x80000000_00000000_00000000_00000000, 0) == true ; run: %icmp_nof_i128(0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 0) == true -; run: %icmp_nof_i128(1, 0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF) == false -; run: %icmp_nof_i128(0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 1) == false -; run: %icmp_nof_i128(0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 0x80000000_00000000_00000000_00000000) == true -; run: %icmp_nof_i128(0x80000000_00000000_00000000_00000000, 0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF) == true +; run: %icmp_nof_i128(1, 0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF) == true +; run: %icmp_nof_i128(0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 1) == true +; run: %icmp_nof_i128(0x80000000_00000000_00000000_00000000, 1) == false +; run: %icmp_nof_i128(1, 0x80000000_00000000_00000000_00000000) == false +; run: %icmp_nof_i128(0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 0x80000000_00000000_00000000_00000000) == false +; run: %icmp_nof_i128(0x80000000_00000000_00000000_00000000, 0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF) == false ; run: %icmp_nof_i128(0x4FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 0x30000000_00000000_00000000_00000000) == true -; run: %icmp_nof_i128(0x4FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 0x30000000_00000000_00000000_00000001) == false +; run: %icmp_nof_i128(0x4FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 0x30000000_00000000_00000000_00000001) == true