aarch64: Implement lowering i128 icmp instructions

We have 3 different aproaches depending on the type of comparision requested: * For eq/ne we compare the high bits and low bits and check if they are equal * For overflow checks, we perform a i128 add and check the resulting overflow flag * For the remaining comparisions (gt/lt/sgt/etc...) We compare both the low bits and high bits, and if the high bits are equal we return the result of the unsigned comparision on the low bits As with other i128 ops, we are still missing immlogic support.
2021-06-09 19:29:44 +01:00
parent 4d085d8fbf
commit 2643d2654c
4 changed files with 520 additions and 70 deletions
--- a/cranelift/codegen/src/isa/aarch64/lower_inst.rs
+++ b/cranelift/codegen/src/isa/aarch64/lower_inst.rs
@@ -1,7 +1,7 @@
 //! Lower a single Cranelift instruction into vcode.

 use crate::binemit::CodeOffset;
-use crate::ir::condcodes::FloatCC;
+use crate::ir::condcodes::{FloatCC, IntCC};
 use crate::ir::types::*;
 use crate::ir::Inst as IRInst;
 use crate::ir::{InstructionData, Opcode, TrapCode};
@@ -1673,14 +1673,112 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
                (false, true) => NarrowValueMode::SignExtend64,
                (false, false) => NarrowValueMode::ZeroExtend64,
            };
-            let rn = put_input_in_reg(ctx, inputs[0], narrow_mode);

-            if !ty.is_vector() {
+            if ty == I128 {
+                let lhs = put_input_in_regs(ctx, inputs[0]);
+                let rhs = put_input_in_regs(ctx, inputs[1]);
+
+                let tmp1 = ctx.alloc_tmp(I64).only_reg().unwrap();
+                let tmp2 = ctx.alloc_tmp(I64).only_reg().unwrap();
+
+                match condcode {
+                    IntCC::Equal | IntCC::NotEqual => {
+                        // eor     tmp1, lhs_lo, rhs_lo
+                        // eor     tmp2, lhs_hi, rhs_hi
+                        // adds    xzr, tmp1, tmp2
+                        // cset    dst, {eq, ne}
+
+                        ctx.emit(Inst::AluRRR {
+                            alu_op: ALUOp::Eor64,
+                            rd: tmp1,
+                            rn: lhs.regs()[0],
+                            rm: rhs.regs()[0],
+                        });
+                        ctx.emit(Inst::AluRRR {
+                            alu_op: ALUOp::Eor64,
+                            rd: tmp2,
+                            rn: lhs.regs()[1],
+                            rm: rhs.regs()[1],
+                        });
+                        ctx.emit(Inst::AluRRR {
+                            alu_op: ALUOp::AddS64,
+                            rd: writable_zero_reg(),
+                            rn: tmp1.to_reg(),
+                            rm: tmp2.to_reg(),
+                        });
+                        materialize_bool_result(ctx, insn, rd, cond);
+                    }
+                    IntCC::Overflow | IntCC::NotOverflow => {
+                        // We can do an 128bit add while throwing away the results
+                        // and check the overflow flags at the end.
+                        //
+                        // adds    xzr, lhs_lo, rhs_lo
+                        // adcs    xzr, lhs_hi, rhs_hi
+                        // cset    dst, {vs, vc}
+
+                        ctx.emit(Inst::AluRRR {
+                            alu_op: ALUOp::AddS64,
+                            rd: writable_zero_reg(),
+                            rn: lhs.regs()[0],
+                            rm: rhs.regs()[0],
+                        });
+                        ctx.emit(Inst::AluRRR {
+                            alu_op: ALUOp::AdcS64,
+                            rd: writable_zero_reg(),
+                            rn: lhs.regs()[1],
+                            rm: rhs.regs()[1],
+                        });
+                        materialize_bool_result(ctx, insn, rd, cond);
+                    }
+                    _ => {
+                        // cmp     lhs_lo, rhs_lo
+                        // cset    tmp1, low_cc
+                        // cmp     lhs_hi, rhs_hi
+                        // cset    tmp2, cond
+                        // csel    dst, tmp1, tmp2, eq
+
+                        let low_cc = match condcode {
+                            IntCC::SignedGreaterThanOrEqual | IntCC::UnsignedGreaterThanOrEqual => {
+                                Cond::Hs
+                            }
+                            IntCC::SignedGreaterThan | IntCC::UnsignedGreaterThan => Cond::Hi,
+                            IntCC::SignedLessThanOrEqual | IntCC::UnsignedLessThanOrEqual => {
+                                Cond::Ls
+                            }
+                            IntCC::SignedLessThan | IntCC::UnsignedLessThan => Cond::Lo,
+                            _ => unreachable!(),
+                        };
+
+                        ctx.emit(Inst::AluRRR {
+                            alu_op: ALUOp::SubS64,
+                            rd: writable_zero_reg(),
+                            rn: lhs.regs()[0],
+                            rm: rhs.regs()[0],
+                        });
+                        materialize_bool_result(ctx, insn, tmp1, low_cc);
+                        ctx.emit(Inst::AluRRR {
+                            alu_op: ALUOp::SubS64,
+                            rd: writable_zero_reg(),
+                            rn: lhs.regs()[1],
+                            rm: rhs.regs()[1],
+                        });
+                        materialize_bool_result(ctx, insn, tmp2, cond);
+                        ctx.emit(Inst::CSel {
+                            cond: Cond::Eq,
+                            rd,
+                            rn: tmp1.to_reg(),
+                            rm: tmp2.to_reg(),
+                        });
+                    }
+                }
+            } else if !ty.is_vector() {
                let alu_op = choose_32_64(ty, ALUOp::SubS32, ALUOp::SubS64);
+                let rn = put_input_in_reg(ctx, inputs[0], narrow_mode);
                let rm = put_input_in_rse_imm12(ctx, inputs[1], narrow_mode);
                ctx.emit(alu_inst_imm12(alu_op, writable_zero_reg(), rn, rm));
                materialize_bool_result(ctx, insn, rd, cond);
            } else {
+                let rn = put_input_in_reg(ctx, inputs[0], narrow_mode);
                let rm = put_input_in_reg(ctx, inputs[1], narrow_mode);
                lower_vector_compare(ctx, rd, rn, rm, ty, cond)?;
            }