aarch64: Fix i128 of/nof implementations (#4403)

@yuyang-ok reported via zulip that i128 overflow tests were:
1. different from the interpreter implementation
2. wrong on some of the test cases

This fixes both the tests and the aarch64 implementation and adds the
interpreter to the testsuite.
This commit is contained in:
Afonso Bordado
2022-07-07 19:00:58 +01:00
committed by GitHub
parent 6a5fe20956
commit e9727b9d4b
4 changed files with 95 additions and 43 deletions

View File

@@ -1284,29 +1284,60 @@ pub(crate) fn lower_icmp<C: LowerCtx<I = Inst>>(
rn: tmp1.to_reg(), rn: tmp1.to_reg(),
rm: tmp2.to_reg(), rm: tmp2.to_reg(),
}); });
cond
} }
IntCC::Overflow | IntCC::NotOverflow => { IntCC::Overflow | IntCC::NotOverflow => {
// We can do an 128bit add while throwing away the results // cmp lhs_lo, rhs_lo
// and check the overflow flags at the end. // sbcs tmp1, lhs_hi, rhs_hi
// // eor tmp2, lhs_hi, rhs_hi
// adds xzr, lhs_lo, rhs_lo // eor tmp1, lhs_hi, tmp1
// adcs xzr, lhs_hi, rhs_hi // tst tmp2, tmp1
// cset dst, {vs, vc} // cset dst, {lt, ge}
ctx.emit(Inst::AluRRR { ctx.emit(Inst::AluRRR {
alu_op: ALUOp::AddS, alu_op: ALUOp::SubS,
size: OperandSize::Size64, size: OperandSize::Size64,
rd: writable_zero_reg(), rd: writable_zero_reg(),
rn: lhs.regs()[0], rn: lhs.regs()[0],
rm: rhs.regs()[0], rm: rhs.regs()[0],
}); });
ctx.emit(Inst::AluRRR { ctx.emit(Inst::AluRRR {
alu_op: ALUOp::AdcS, alu_op: ALUOp::SbcS,
size: OperandSize::Size64, size: OperandSize::Size64,
rd: writable_zero_reg(), rd: tmp1,
rn: lhs.regs()[1], rn: lhs.regs()[1],
rm: rhs.regs()[1], rm: rhs.regs()[1],
}); });
ctx.emit(Inst::AluRRR {
alu_op: ALUOp::Eor,
size: OperandSize::Size64,
rd: tmp2,
rn: lhs.regs()[1],
rm: rhs.regs()[1],
});
ctx.emit(Inst::AluRRR {
alu_op: ALUOp::Eor,
size: OperandSize::Size64,
rd: tmp1,
rn: lhs.regs()[1],
rm: tmp1.to_reg(),
});
ctx.emit(Inst::AluRRR {
alu_op: ALUOp::AndS,
size: OperandSize::Size64,
rd: writable_zero_reg(),
rn: tmp2.to_reg(),
rm: tmp1.to_reg(),
});
// This instruction sequence sets the condition codes
// on the lt and ge flags instead of the vs/vc so we
// need to signal that
if condcode == IntCC::Overflow {
Cond::Lt
} else {
Cond::Ge
}
} }
_ => { _ => {
// cmp lhs_lo, rhs_lo // cmp lhs_lo, rhs_lo
@@ -1376,9 +1407,9 @@ pub(crate) fn lower_icmp<C: LowerCtx<I = Inst>>(
// Prevent a second materialize_bool_result to be emitted at the end of the function // Prevent a second materialize_bool_result to be emitted at the end of the function
should_materialize = false; should_materialize = false;
cond
} }
} }
cond
} else if ty.is_vector() { } else if ty.is_vector() {
assert_ne!(output, IcmpOutput::CondCode); assert_ne!(output, IcmpOutput::CondCode);
should_materialize = false; should_materialize = false;
@@ -1437,7 +1468,7 @@ pub(crate) fn lower_icmp<C: LowerCtx<I = Inst>>(
// in a register we materialize those flags into a register. Some branches do end up producing // in a register we materialize those flags into a register. Some branches do end up producing
// the result as a register by default, so we ignore those. // the result as a register by default, so we ignore those.
if should_materialize { if should_materialize {
materialize_bool_result(ctx, insn, rd, cond); materialize_bool_result(ctx, insn, rd, out_condcode);
} }
Ok(match output { Ok(match output {

View File

@@ -158,9 +158,12 @@ block0(v0: i128, v1: i128):
} }
; block0: ; block0:
; adds xzr, x0, x2 ; subs xzr, x0, x2
; adcs xzr, x1, x3 ; sbcs x11, x1, x3
; cset x0, vs ; eor x13, x1, x3
; eor x11, x1, x11
; ands xzr, x13, x11
; cset x0, lt
; ret ; ret
function %icmp_nof_i128(i128, i128) -> b1 { function %icmp_nof_i128(i128, i128) -> b1 {
@@ -170,9 +173,12 @@ block0(v0: i128, v1: i128):
} }
; block0: ; block0:
; adds xzr, x0, x2 ; subs xzr, x0, x2
; adcs xzr, x1, x3 ; sbcs x11, x1, x3
; cset x0, vc ; eor x13, x1, x3
; eor x11, x1, x11
; ands xzr, x13, x11
; cset x0, ge
; ret ; ret
function %f(i64, i64) -> i64 { function %f(i64, i64) -> i64 {
@@ -510,9 +516,12 @@ block1:
} }
; block0: ; block0:
; adds xzr, x0, x2 ; subs xzr, x0, x2
; adcs xzr, x1, x3 ; sbcs x9, x1, x3
; b.vs label1 ; b label2 ; eor x11, x1, x3
; eor x9, x1, x9
; ands xzr, x11, x9
; b.lt label1 ; b label2
; block1: ; block1:
; b label3 ; b label3
; block2: ; block2:
@@ -530,9 +539,12 @@ block1:
} }
; block0: ; block0:
; adds xzr, x0, x2 ; subs xzr, x0, x2
; adcs xzr, x1, x3 ; sbcs x9, x1, x3
; b.vc label1 ; b label2 ; eor x11, x1, x3
; eor x9, x1, x9
; ands xzr, x11, x9
; b.ge label1 ; b label2
; block1: ; block1:
; b label3 ; b label3
; block2: ; block2:

View File

@@ -251,12 +251,14 @@ block2:
; run: %i128_bricmp_of(-1, -1) == false ; run: %i128_bricmp_of(-1, -1) == false
; run: %i128_bricmp_of(0x80000000_00000000_00000000_00000000, 0) == false ; run: %i128_bricmp_of(0x80000000_00000000_00000000_00000000, 0) == false
; run: %i128_bricmp_of(0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 0) == false ; run: %i128_bricmp_of(0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 0) == false
; run: %i128_bricmp_of(1, 0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF) == true ; run: %i128_bricmp_of(1, 0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF) == false
; run: %i128_bricmp_of(0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 1) == true ; run: %i128_bricmp_of(0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 1) == false
; run: %i128_bricmp_of(0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 0x80000000_00000000_00000000_00000000) == false ; run: %i128_bricmp_of(0x80000000_00000000_00000000_00000000, 1) == true
; run: %i128_bricmp_of(0x80000000_00000000_00000000_00000000, 0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF) == false ; run: %i128_bricmp_of(1, 0x80000000_00000000_00000000_00000000) == true
; run: %i128_bricmp_of(0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 0x80000000_00000000_00000000_00000000) == true
; run: %i128_bricmp_of(0x80000000_00000000_00000000_00000000, 0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF) == true
; run: %i128_bricmp_of(0x4FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 0x30000000_00000000_00000000_00000000) == false ; run: %i128_bricmp_of(0x4FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 0x30000000_00000000_00000000_00000000) == false
; run: %i128_bricmp_of(0x4FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 0x30000000_00000000_00000000_00000001) == true ; run: %i128_bricmp_of(0x4FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 0x30000000_00000000_00000000_00000001) == false
function %i128_bricmp_nof(i128, i128) -> b1 { function %i128_bricmp_nof(i128, i128) -> b1 {
block0(v0: i128,v1: i128): block0(v0: i128,v1: i128):
@@ -277,9 +279,11 @@ block2:
; run: %i128_bricmp_nof(-1, -1) == true ; run: %i128_bricmp_nof(-1, -1) == true
; run: %i128_bricmp_nof(0x80000000_00000000_00000000_00000000, 0) == true ; run: %i128_bricmp_nof(0x80000000_00000000_00000000_00000000, 0) == true
; run: %i128_bricmp_nof(0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 0) == true ; run: %i128_bricmp_nof(0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 0) == true
; run: %i128_bricmp_nof(1, 0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF) == false ; run: %i128_bricmp_nof(1, 0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF) == true
; run: %i128_bricmp_nof(0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 1) == false ; run: %i128_bricmp_nof(0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 1) == true
; run: %i128_bricmp_nof(0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 0x80000000_00000000_00000000_00000000) == true ; run: %i128_bricmp_nof(0x80000000_00000000_00000000_00000000, 1) == false
; run: %i128_bricmp_nof(0x80000000_00000000_00000000_00000000, 0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF) == true ; run: %i128_bricmp_nof(1, 0x80000000_00000000_00000000_00000000) == false
; run: %i128_bricmp_nof(0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 0x80000000_00000000_00000000_00000000) == false
; run: %i128_bricmp_nof(0x80000000_00000000_00000000_00000000, 0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF) == false
; run: %i128_bricmp_nof(0x4FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 0x30000000_00000000_00000000_00000000) == true ; run: %i128_bricmp_nof(0x4FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 0x30000000_00000000_00000000_00000000) == true
; run: %i128_bricmp_nof(0x4FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 0x30000000_00000000_00000000_00000001) == false ; run: %i128_bricmp_nof(0x4FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 0x30000000_00000000_00000000_00000001) == true

View File

@@ -1,3 +1,4 @@
test interpret
test run test run
target aarch64 target aarch64
@@ -12,12 +13,14 @@ block0(v0: i128, v1: i128):
; run: %icmp_of_i128(-1, -1) == false ; run: %icmp_of_i128(-1, -1) == false
; run: %icmp_of_i128(0x80000000_00000000_00000000_00000000, 0) == false ; run: %icmp_of_i128(0x80000000_00000000_00000000_00000000, 0) == false
; run: %icmp_of_i128(0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 0) == false ; run: %icmp_of_i128(0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 0) == false
; run: %icmp_of_i128(1, 0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF) == true ; run: %icmp_of_i128(1, 0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF) == false
; run: %icmp_of_i128(0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 1) == true ; run: %icmp_of_i128(0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 1) == false
; run: %icmp_of_i128(0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 0x80000000_00000000_00000000_00000000) == false ; run: %icmp_of_i128(0x80000000_00000000_00000000_00000000, 1) == true
; run: %icmp_of_i128(0x80000000_00000000_00000000_00000000, 0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF) == false ; run: %icmp_of_i128(1, 0x80000000_00000000_00000000_00000000) == true
; run: %icmp_of_i128(0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 0x80000000_00000000_00000000_00000000) == true
; run: %icmp_of_i128(0x80000000_00000000_00000000_00000000, 0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF) == true
; run: %icmp_of_i128(0x4FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 0x30000000_00000000_00000000_00000000) == false ; run: %icmp_of_i128(0x4FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 0x30000000_00000000_00000000_00000000) == false
; run: %icmp_of_i128(0x4FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 0x30000000_00000000_00000000_00000001) == true ; run: %icmp_of_i128(0x4FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 0x30000000_00000000_00000000_00000001) == false
function %icmp_nof_i128(i128, i128) -> b1 { function %icmp_nof_i128(i128, i128) -> b1 {
block0(v0: i128, v1: i128): block0(v0: i128, v1: i128):
@@ -30,9 +33,11 @@ block0(v0: i128, v1: i128):
; run: %icmp_nof_i128(-1, -1) == true ; run: %icmp_nof_i128(-1, -1) == true
; run: %icmp_nof_i128(0x80000000_00000000_00000000_00000000, 0) == true ; run: %icmp_nof_i128(0x80000000_00000000_00000000_00000000, 0) == true
; run: %icmp_nof_i128(0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 0) == true ; run: %icmp_nof_i128(0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 0) == true
; run: %icmp_nof_i128(1, 0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF) == false ; run: %icmp_nof_i128(1, 0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF) == true
; run: %icmp_nof_i128(0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 1) == false ; run: %icmp_nof_i128(0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 1) == true
; run: %icmp_nof_i128(0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 0x80000000_00000000_00000000_00000000) == true ; run: %icmp_nof_i128(0x80000000_00000000_00000000_00000000, 1) == false
; run: %icmp_nof_i128(0x80000000_00000000_00000000_00000000, 0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF) == true ; run: %icmp_nof_i128(1, 0x80000000_00000000_00000000_00000000) == false
; run: %icmp_nof_i128(0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 0x80000000_00000000_00000000_00000000) == false
; run: %icmp_nof_i128(0x80000000_00000000_00000000_00000000, 0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF) == false
; run: %icmp_nof_i128(0x4FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 0x30000000_00000000_00000000_00000000) == true ; run: %icmp_nof_i128(0x4FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 0x30000000_00000000_00000000_00000000) == true
; run: %icmp_nof_i128(0x4FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 0x30000000_00000000_00000000_00000001) == false ; run: %icmp_nof_i128(0x4FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 0x30000000_00000000_00000000_00000001) == true