aarch64: Fix i128 of/nof implementations (#4403)

@yuyang-ok reported via zulip that i128 overflow tests were:
1. different from the interpreter implementation
2. wrong on some of the test cases

This fixes both the tests and the aarch64 implementation and adds the
interpreter to the testsuite.
This commit is contained in:
Afonso Bordado
2022-07-07 19:00:58 +01:00
committed by GitHub
parent 6a5fe20956
commit e9727b9d4b
4 changed files with 95 additions and 43 deletions

View File

@@ -1284,29 +1284,60 @@ pub(crate) fn lower_icmp<C: LowerCtx<I = Inst>>(
rn: tmp1.to_reg(),
rm: tmp2.to_reg(),
});
cond
}
IntCC::Overflow | IntCC::NotOverflow => {
// We can do an 128bit add while throwing away the results
// and check the overflow flags at the end.
//
// adds xzr, lhs_lo, rhs_lo
// adcs xzr, lhs_hi, rhs_hi
// cset dst, {vs, vc}
// cmp lhs_lo, rhs_lo
// sbcs tmp1, lhs_hi, rhs_hi
// eor tmp2, lhs_hi, rhs_hi
// eor tmp1, lhs_hi, tmp1
// tst tmp2, tmp1
// cset dst, {lt, ge}
ctx.emit(Inst::AluRRR {
alu_op: ALUOp::AddS,
alu_op: ALUOp::SubS,
size: OperandSize::Size64,
rd: writable_zero_reg(),
rn: lhs.regs()[0],
rm: rhs.regs()[0],
});
ctx.emit(Inst::AluRRR {
alu_op: ALUOp::AdcS,
alu_op: ALUOp::SbcS,
size: OperandSize::Size64,
rd: writable_zero_reg(),
rd: tmp1,
rn: lhs.regs()[1],
rm: rhs.regs()[1],
});
ctx.emit(Inst::AluRRR {
alu_op: ALUOp::Eor,
size: OperandSize::Size64,
rd: tmp2,
rn: lhs.regs()[1],
rm: rhs.regs()[1],
});
ctx.emit(Inst::AluRRR {
alu_op: ALUOp::Eor,
size: OperandSize::Size64,
rd: tmp1,
rn: lhs.regs()[1],
rm: tmp1.to_reg(),
});
ctx.emit(Inst::AluRRR {
alu_op: ALUOp::AndS,
size: OperandSize::Size64,
rd: writable_zero_reg(),
rn: tmp2.to_reg(),
rm: tmp1.to_reg(),
});
// This instruction sequence sets the condition codes
// on the lt and ge flags instead of the vs/vc so we
// need to signal that
if condcode == IntCC::Overflow {
Cond::Lt
} else {
Cond::Ge
}
}
_ => {
// cmp lhs_lo, rhs_lo
@@ -1376,9 +1407,9 @@ pub(crate) fn lower_icmp<C: LowerCtx<I = Inst>>(
// Prevent a second materialize_bool_result to be emitted at the end of the function
should_materialize = false;
}
}
cond
}
}
} else if ty.is_vector() {
assert_ne!(output, IcmpOutput::CondCode);
should_materialize = false;
@@ -1437,7 +1468,7 @@ pub(crate) fn lower_icmp<C: LowerCtx<I = Inst>>(
// in a register we materialize those flags into a register. Some branches do end up producing
// the result as a register by default, so we ignore those.
if should_materialize {
materialize_bool_result(ctx, insn, rd, cond);
materialize_bool_result(ctx, insn, rd, out_condcode);
}
Ok(match output {

View File

@@ -158,9 +158,12 @@ block0(v0: i128, v1: i128):
}
; block0:
; adds xzr, x0, x2
; adcs xzr, x1, x3
; cset x0, vs
; subs xzr, x0, x2
; sbcs x11, x1, x3
; eor x13, x1, x3
; eor x11, x1, x11
; ands xzr, x13, x11
; cset x0, lt
; ret
function %icmp_nof_i128(i128, i128) -> b1 {
@@ -170,9 +173,12 @@ block0(v0: i128, v1: i128):
}
; block0:
; adds xzr, x0, x2
; adcs xzr, x1, x3
; cset x0, vc
; subs xzr, x0, x2
; sbcs x11, x1, x3
; eor x13, x1, x3
; eor x11, x1, x11
; ands xzr, x13, x11
; cset x0, ge
; ret
function %f(i64, i64) -> i64 {
@@ -510,9 +516,12 @@ block1:
}
; block0:
; adds xzr, x0, x2
; adcs xzr, x1, x3
; b.vs label1 ; b label2
; subs xzr, x0, x2
; sbcs x9, x1, x3
; eor x11, x1, x3
; eor x9, x1, x9
; ands xzr, x11, x9
; b.lt label1 ; b label2
; block1:
; b label3
; block2:
@@ -530,9 +539,12 @@ block1:
}
; block0:
; adds xzr, x0, x2
; adcs xzr, x1, x3
; b.vc label1 ; b label2
; subs xzr, x0, x2
; sbcs x9, x1, x3
; eor x11, x1, x3
; eor x9, x1, x9
; ands xzr, x11, x9
; b.ge label1 ; b label2
; block1:
; b label3
; block2:

View File

@@ -251,12 +251,14 @@ block2:
; run: %i128_bricmp_of(-1, -1) == false
; run: %i128_bricmp_of(0x80000000_00000000_00000000_00000000, 0) == false
; run: %i128_bricmp_of(0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 0) == false
; run: %i128_bricmp_of(1, 0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF) == true
; run: %i128_bricmp_of(0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 1) == true
; run: %i128_bricmp_of(0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 0x80000000_00000000_00000000_00000000) == false
; run: %i128_bricmp_of(0x80000000_00000000_00000000_00000000, 0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF) == false
; run: %i128_bricmp_of(1, 0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF) == false
; run: %i128_bricmp_of(0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 1) == false
; run: %i128_bricmp_of(0x80000000_00000000_00000000_00000000, 1) == true
; run: %i128_bricmp_of(1, 0x80000000_00000000_00000000_00000000) == true
; run: %i128_bricmp_of(0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 0x80000000_00000000_00000000_00000000) == true
; run: %i128_bricmp_of(0x80000000_00000000_00000000_00000000, 0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF) == true
; run: %i128_bricmp_of(0x4FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 0x30000000_00000000_00000000_00000000) == false
; run: %i128_bricmp_of(0x4FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 0x30000000_00000000_00000000_00000001) == true
; run: %i128_bricmp_of(0x4FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 0x30000000_00000000_00000000_00000001) == false
function %i128_bricmp_nof(i128, i128) -> b1 {
block0(v0: i128,v1: i128):
@@ -277,9 +279,11 @@ block2:
; run: %i128_bricmp_nof(-1, -1) == true
; run: %i128_bricmp_nof(0x80000000_00000000_00000000_00000000, 0) == true
; run: %i128_bricmp_nof(0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 0) == true
; run: %i128_bricmp_nof(1, 0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF) == false
; run: %i128_bricmp_nof(0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 1) == false
; run: %i128_bricmp_nof(0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 0x80000000_00000000_00000000_00000000) == true
; run: %i128_bricmp_nof(0x80000000_00000000_00000000_00000000, 0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF) == true
; run: %i128_bricmp_nof(1, 0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF) == true
; run: %i128_bricmp_nof(0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 1) == true
; run: %i128_bricmp_nof(0x80000000_00000000_00000000_00000000, 1) == false
; run: %i128_bricmp_nof(1, 0x80000000_00000000_00000000_00000000) == false
; run: %i128_bricmp_nof(0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 0x80000000_00000000_00000000_00000000) == false
; run: %i128_bricmp_nof(0x80000000_00000000_00000000_00000000, 0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF) == false
; run: %i128_bricmp_nof(0x4FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 0x30000000_00000000_00000000_00000000) == true
; run: %i128_bricmp_nof(0x4FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 0x30000000_00000000_00000000_00000001) == false
; run: %i128_bricmp_nof(0x4FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 0x30000000_00000000_00000000_00000001) == true

View File

@@ -1,3 +1,4 @@
test interpret
test run
target aarch64
@@ -12,12 +13,14 @@ block0(v0: i128, v1: i128):
; run: %icmp_of_i128(-1, -1) == false
; run: %icmp_of_i128(0x80000000_00000000_00000000_00000000, 0) == false
; run: %icmp_of_i128(0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 0) == false
; run: %icmp_of_i128(1, 0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF) == true
; run: %icmp_of_i128(0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 1) == true
; run: %icmp_of_i128(0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 0x80000000_00000000_00000000_00000000) == false
; run: %icmp_of_i128(0x80000000_00000000_00000000_00000000, 0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF) == false
; run: %icmp_of_i128(1, 0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF) == false
; run: %icmp_of_i128(0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 1) == false
; run: %icmp_of_i128(0x80000000_00000000_00000000_00000000, 1) == true
; run: %icmp_of_i128(1, 0x80000000_00000000_00000000_00000000) == true
; run: %icmp_of_i128(0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 0x80000000_00000000_00000000_00000000) == true
; run: %icmp_of_i128(0x80000000_00000000_00000000_00000000, 0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF) == true
; run: %icmp_of_i128(0x4FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 0x30000000_00000000_00000000_00000000) == false
; run: %icmp_of_i128(0x4FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 0x30000000_00000000_00000000_00000001) == true
; run: %icmp_of_i128(0x4FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 0x30000000_00000000_00000000_00000001) == false
function %icmp_nof_i128(i128, i128) -> b1 {
block0(v0: i128, v1: i128):
@@ -30,9 +33,11 @@ block0(v0: i128, v1: i128):
; run: %icmp_nof_i128(-1, -1) == true
; run: %icmp_nof_i128(0x80000000_00000000_00000000_00000000, 0) == true
; run: %icmp_nof_i128(0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 0) == true
; run: %icmp_nof_i128(1, 0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF) == false
; run: %icmp_nof_i128(0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 1) == false
; run: %icmp_nof_i128(0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 0x80000000_00000000_00000000_00000000) == true
; run: %icmp_nof_i128(0x80000000_00000000_00000000_00000000, 0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF) == true
; run: %icmp_nof_i128(1, 0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF) == true
; run: %icmp_nof_i128(0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 1) == true
; run: %icmp_nof_i128(0x80000000_00000000_00000000_00000000, 1) == false
; run: %icmp_nof_i128(1, 0x80000000_00000000_00000000_00000000) == false
; run: %icmp_nof_i128(0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 0x80000000_00000000_00000000_00000000) == false
; run: %icmp_nof_i128(0x80000000_00000000_00000000_00000000, 0x7FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF) == false
; run: %icmp_nof_i128(0x4FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 0x30000000_00000000_00000000_00000000) == true
; run: %icmp_nof_i128(0x4FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 0x30000000_00000000_00000000_00000001) == false
; run: %icmp_nof_i128(0x4FFFFFFF_FFFFFFFF_FFFFFFFF_FFFFFFFF, 0x30000000_00000000_00000000_00000001) == true