Merge pull request #2975 from afonso360/aarch64-icmp

aarch64: Implement lowering i128 icmp instructions
This commit is contained in:
Chris Fallin
2021-06-09 15:38:41 -07:00
committed by GitHub
7 changed files with 574 additions and 70 deletions

View File

@@ -1,7 +1,7 @@
//! Lower a single Cranelift instruction into vcode.
use crate::binemit::CodeOffset;
use crate::ir::condcodes::FloatCC;
use crate::ir::condcodes::{FloatCC, IntCC};
use crate::ir::types::*;
use crate::ir::Inst as IRInst;
use crate::ir::{InstructionData, Opcode, TrapCode};
@@ -1735,14 +1735,112 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
(false, true) => NarrowValueMode::SignExtend64,
(false, false) => NarrowValueMode::ZeroExtend64,
};
let rn = put_input_in_reg(ctx, inputs[0], narrow_mode);
if !ty.is_vector() {
if ty == I128 {
let lhs = put_input_in_regs(ctx, inputs[0]);
let rhs = put_input_in_regs(ctx, inputs[1]);
let tmp1 = ctx.alloc_tmp(I64).only_reg().unwrap();
let tmp2 = ctx.alloc_tmp(I64).only_reg().unwrap();
match condcode {
IntCC::Equal | IntCC::NotEqual => {
// eor tmp1, lhs_lo, rhs_lo
// eor tmp2, lhs_hi, rhs_hi
// adds xzr, tmp1, tmp2
// cset dst, {eq, ne}
ctx.emit(Inst::AluRRR {
alu_op: ALUOp::Eor64,
rd: tmp1,
rn: lhs.regs()[0],
rm: rhs.regs()[0],
});
ctx.emit(Inst::AluRRR {
alu_op: ALUOp::Eor64,
rd: tmp2,
rn: lhs.regs()[1],
rm: rhs.regs()[1],
});
ctx.emit(Inst::AluRRR {
alu_op: ALUOp::AddS64,
rd: writable_zero_reg(),
rn: tmp1.to_reg(),
rm: tmp2.to_reg(),
});
materialize_bool_result(ctx, insn, rd, cond);
}
IntCC::Overflow | IntCC::NotOverflow => {
// We can do an 128bit add while throwing away the results
// and check the overflow flags at the end.
//
// adds xzr, lhs_lo, rhs_lo
// adcs xzr, lhs_hi, rhs_hi
// cset dst, {vs, vc}
ctx.emit(Inst::AluRRR {
alu_op: ALUOp::AddS64,
rd: writable_zero_reg(),
rn: lhs.regs()[0],
rm: rhs.regs()[0],
});
ctx.emit(Inst::AluRRR {
alu_op: ALUOp::AdcS64,
rd: writable_zero_reg(),
rn: lhs.regs()[1],
rm: rhs.regs()[1],
});
materialize_bool_result(ctx, insn, rd, cond);
}
_ => {
// cmp lhs_lo, rhs_lo
// cset tmp1, low_cc
// cmp lhs_hi, rhs_hi
// cset tmp2, cond
// csel dst, tmp1, tmp2, eq
let low_cc = match condcode {
IntCC::SignedGreaterThanOrEqual | IntCC::UnsignedGreaterThanOrEqual => {
Cond::Hs
}
IntCC::SignedGreaterThan | IntCC::UnsignedGreaterThan => Cond::Hi,
IntCC::SignedLessThanOrEqual | IntCC::UnsignedLessThanOrEqual => {
Cond::Ls
}
IntCC::SignedLessThan | IntCC::UnsignedLessThan => Cond::Lo,
_ => unreachable!(),
};
ctx.emit(Inst::AluRRR {
alu_op: ALUOp::SubS64,
rd: writable_zero_reg(),
rn: lhs.regs()[0],
rm: rhs.regs()[0],
});
materialize_bool_result(ctx, insn, tmp1, low_cc);
ctx.emit(Inst::AluRRR {
alu_op: ALUOp::SubS64,
rd: writable_zero_reg(),
rn: lhs.regs()[1],
rm: rhs.regs()[1],
});
materialize_bool_result(ctx, insn, tmp2, cond);
ctx.emit(Inst::CSel {
cond: Cond::Eq,
rd,
rn: tmp1.to_reg(),
rm: tmp2.to_reg(),
});
}
}
} else if !ty.is_vector() {
let alu_op = choose_32_64(ty, ALUOp::SubS32, ALUOp::SubS64);
let rn = put_input_in_reg(ctx, inputs[0], narrow_mode);
let rm = put_input_in_rse_imm12(ctx, inputs[1], narrow_mode);
ctx.emit(alu_inst_imm12(alu_op, writable_zero_reg(), rn, rm));
materialize_bool_result(ctx, insn, rd, cond);
} else {
let rn = put_input_in_reg(ctx, inputs[0], narrow_mode);
let rm = put_input_in_reg(ctx, inputs[1], narrow_mode);
lower_vector_compare(ctx, rd, rn, rm, ty, cond)?;
}