Merge pull request #2975 from afonso360/aarch64-icmp

aarch64: Implement lowering i128 icmp instructions
2021-06-09 15:38:41 -07:00
parent caa85c2fa5 2643d2654c
commit 3d56728b86
7 changed files with 574 additions and 70 deletions
--- a/cranelift/codegen/src/isa/aarch64/inst/emit.rs
+++ b/cranelift/codegen/src/isa/aarch64/inst/emit.rs
@@ -599,10 +599,14 @@ impl MachInstEmit for Inst {
                    ALUOp::Add64 => 0b10001011_000,
                    ALUOp::Adc32 => 0b00011010_000,
                    ALUOp::Adc64 => 0b10011010_000,
+                    ALUOp::AdcS32 => 0b00111010_000,
+                    ALUOp::AdcS64 => 0b10111010_000,
                    ALUOp::Sub32 => 0b01001011_000,
                    ALUOp::Sub64 => 0b11001011_000,
                    ALUOp::Sbc32 => 0b01011010_000,
                    ALUOp::Sbc64 => 0b11011010_000,
+                    ALUOp::SbcS32 => 0b01111010_000,
+                    ALUOp::SbcS64 => 0b11111010_000,
                    ALUOp::Orr32 => 0b00101010_000,
                    ALUOp::Orr64 => 0b10101010_000,
                    ALUOp::And32 => 0b00001010_000,
--- a/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs
+++ b/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs
@@ -70,6 +70,26 @@ fn test_aarch64_binemit() {
        "A400069A",
        "adc x4, x5, x6",
    ));
+    insns.push((
+        Inst::AluRRR {
+            alu_op: ALUOp::AdcS32,
+            rd: writable_xreg(1),
+            rn: xreg(2),
+            rm: xreg(3),
+        },
+        "4100033A",
+        "adcs w1, w2, w3",
+    ));
+    insns.push((
+        Inst::AluRRR {
+            alu_op: ALUOp::AdcS64,
+            rd: writable_xreg(4),
+            rn: xreg(5),
+            rm: xreg(6),
+        },
+        "A40006BA",
+        "adcs x4, x5, x6",
+    ));
    insns.push((
        Inst::AluRRR {
            alu_op: ALUOp::Sub32,
@@ -110,6 +130,26 @@ fn test_aarch64_binemit() {
        "A40006DA",
        "sbc x4, x5, x6",
    ));
+    insns.push((
+        Inst::AluRRR {
+            alu_op: ALUOp::SbcS32,
+            rd: writable_xreg(1),
+            rn: xreg(2),
+            rm: xreg(3),
+        },
+        "4100037A",
+        "sbcs w1, w2, w3",
+    ));
+    insns.push((
+        Inst::AluRRR {
+            alu_op: ALUOp::SbcS64,
+            rd: writable_xreg(4),
+            rn: xreg(5),
+            rm: xreg(6),
+        },
+        "A40006FA",
+        "sbcs x4, x5, x6",
+    ));

    insns.push((
        Inst::AluRRR {
--- a/cranelift/codegen/src/isa/aarch64/inst/mod.rs
+++ b/cranelift/codegen/src/isa/aarch64/inst/mod.rs
@@ -89,9 +89,15 @@ pub enum ALUOp {
    /// Add with carry
    Adc32,
    Adc64,
+    /// Add with carry, settings flags
+    AdcS32,
+    AdcS64,
    /// Subtract with carry
    Sbc32,
    Sbc64,
+    /// Subtract with carry, settings flags
+    SbcS32,
+    SbcS64,
 }

 /// An ALU operation with three arguments.
@@ -3216,8 +3222,12 @@ impl Inst {
                ALUOp::Lsl64 => ("lsl", OperandSize::Size64),
                ALUOp::Adc32 => ("adc", OperandSize::Size32),
                ALUOp::Adc64 => ("adc", OperandSize::Size64),
+                ALUOp::AdcS32 => ("adcs", OperandSize::Size32),
+                ALUOp::AdcS64 => ("adcs", OperandSize::Size64),
                ALUOp::Sbc32 => ("sbc", OperandSize::Size32),
                ALUOp::Sbc64 => ("sbc", OperandSize::Size64),
+                ALUOp::SbcS32 => ("sbcs", OperandSize::Size32),
+                ALUOp::SbcS64 => ("sbcs", OperandSize::Size64),
            }
        }

--- a/cranelift/codegen/src/isa/aarch64/lower_inst.rs
+++ b/cranelift/codegen/src/isa/aarch64/lower_inst.rs
@@ -1,7 +1,7 @@
 //! Lower a single Cranelift instruction into vcode.

 use crate::binemit::CodeOffset;
-use crate::ir::condcodes::FloatCC;
+use crate::ir::condcodes::{FloatCC, IntCC};
 use crate::ir::types::*;
 use crate::ir::Inst as IRInst;
 use crate::ir::{InstructionData, Opcode, TrapCode};
@@ -1735,14 +1735,112 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
                (false, true) => NarrowValueMode::SignExtend64,
                (false, false) => NarrowValueMode::ZeroExtend64,
            };
-            let rn = put_input_in_reg(ctx, inputs[0], narrow_mode);

-            if !ty.is_vector() {
+            if ty == I128 {
+                let lhs = put_input_in_regs(ctx, inputs[0]);
+                let rhs = put_input_in_regs(ctx, inputs[1]);
+
+                let tmp1 = ctx.alloc_tmp(I64).only_reg().unwrap();
+                let tmp2 = ctx.alloc_tmp(I64).only_reg().unwrap();
+
+                match condcode {
+                    IntCC::Equal | IntCC::NotEqual => {
+                        // eor     tmp1, lhs_lo, rhs_lo
+                        // eor     tmp2, lhs_hi, rhs_hi
+                        // adds    xzr, tmp1, tmp2
+                        // cset    dst, {eq, ne}
+
+                        ctx.emit(Inst::AluRRR {
+                            alu_op: ALUOp::Eor64,
+                            rd: tmp1,
+                            rn: lhs.regs()[0],
+                            rm: rhs.regs()[0],
+                        });
+                        ctx.emit(Inst::AluRRR {
+                            alu_op: ALUOp::Eor64,
+                            rd: tmp2,
+                            rn: lhs.regs()[1],
+                            rm: rhs.regs()[1],
+                        });
+                        ctx.emit(Inst::AluRRR {
+                            alu_op: ALUOp::AddS64,
+                            rd: writable_zero_reg(),
+                            rn: tmp1.to_reg(),
+                            rm: tmp2.to_reg(),
+                        });
+                        materialize_bool_result(ctx, insn, rd, cond);
+                    }
+                    IntCC::Overflow | IntCC::NotOverflow => {
+                        // We can do an 128bit add while throwing away the results
+                        // and check the overflow flags at the end.
+                        //
+                        // adds    xzr, lhs_lo, rhs_lo
+                        // adcs    xzr, lhs_hi, rhs_hi
+                        // cset    dst, {vs, vc}
+
+                        ctx.emit(Inst::AluRRR {
+                            alu_op: ALUOp::AddS64,
+                            rd: writable_zero_reg(),
+                            rn: lhs.regs()[0],
+                            rm: rhs.regs()[0],
+                        });
+                        ctx.emit(Inst::AluRRR {
+                            alu_op: ALUOp::AdcS64,
+                            rd: writable_zero_reg(),
+                            rn: lhs.regs()[1],
+                            rm: rhs.regs()[1],
+                        });
+                        materialize_bool_result(ctx, insn, rd, cond);
+                    }
+                    _ => {
+                        // cmp     lhs_lo, rhs_lo
+                        // cset    tmp1, low_cc
+                        // cmp     lhs_hi, rhs_hi
+                        // cset    tmp2, cond
+                        // csel    dst, tmp1, tmp2, eq
+
+                        let low_cc = match condcode {
+                            IntCC::SignedGreaterThanOrEqual | IntCC::UnsignedGreaterThanOrEqual => {
+                                Cond::Hs
+                            }
+                            IntCC::SignedGreaterThan | IntCC::UnsignedGreaterThan => Cond::Hi,
+                            IntCC::SignedLessThanOrEqual | IntCC::UnsignedLessThanOrEqual => {
+                                Cond::Ls
+                            }
+                            IntCC::SignedLessThan | IntCC::UnsignedLessThan => Cond::Lo,
+                            _ => unreachable!(),
+                        };
+
+                        ctx.emit(Inst::AluRRR {
+                            alu_op: ALUOp::SubS64,
+                            rd: writable_zero_reg(),
+                            rn: lhs.regs()[0],
+                            rm: rhs.regs()[0],
+                        });
+                        materialize_bool_result(ctx, insn, tmp1, low_cc);
+                        ctx.emit(Inst::AluRRR {
+                            alu_op: ALUOp::SubS64,
+                            rd: writable_zero_reg(),
+                            rn: lhs.regs()[1],
+                            rm: rhs.regs()[1],
+                        });
+                        materialize_bool_result(ctx, insn, tmp2, cond);
+                        ctx.emit(Inst::CSel {
+                            cond: Cond::Eq,
+                            rd,
+                            rn: tmp1.to_reg(),
+                            rm: tmp2.to_reg(),
+                        });
+                    }
+                }
+            } else if !ty.is_vector() {
                let alu_op = choose_32_64(ty, ALUOp::SubS32, ALUOp::SubS64);
+                let rn = put_input_in_reg(ctx, inputs[0], narrow_mode);
                let rm = put_input_in_rse_imm12(ctx, inputs[1], narrow_mode);
                ctx.emit(alu_inst_imm12(alu_op, writable_zero_reg(), rn, rm));
                materialize_bool_result(ctx, insn, rd, cond);
            } else {
+                let rn = put_input_in_reg(ctx, inputs[0], narrow_mode);
                let rm = put_input_in_reg(ctx, inputs[1], narrow_mode);
                lower_vector_compare(ctx, rd, rn, rm, ty, cond)?;
            }