[AArch64] Refactor ALUOp3 (#3950)

2022-04-14 20:16:56 +01:00
parent 51d82aebfd
commit e142f587a7
9 changed files with 699 additions and 487 deletions
--- a/cranelift/codegen/src/isa/aarch64/inst.isle
+++ b/cranelift/codegen/src/isa/aarch64/inst.isle
@@ -18,6 +18,7 @@
       ;; An ALU operation with three register sources and a register destination.
       (AluRRRR
        (alu_op ALUOp3)
+        (size OperandSize)
        (rd WritableReg)
        (rn Reg)
        (rm Reg)
@@ -833,13 +834,9 @@
 (type ALUOp3
  (enum
    ;; Multiply-add
-    (MAdd32)
-    ;; Multiply-add
-    (MAdd64)
+    (MAdd)
    ;; Multiply-sub
-    (MSub32)
-    ;; Multiply-sub
-    (MSub64)
+    (MSub)
 ))

 (type UImm5 (primitive UImm5))
@@ -1461,10 +1458,10 @@
        (alu_rrr_extend op ty src1 src2 extend)))

 ;; Helper for emitting `MInst.AluRRRR` instructions.
-(decl alu_rrrr (ALUOp3 Reg Reg Reg) Reg)
-(rule (alu_rrrr op src1 src2 src3)
+(decl alu_rrrr (ALUOp3 Type Reg Reg Reg) Reg)
+(rule (alu_rrrr op ty src1 src2 src3)
      (let ((dst WritableReg (temp_writable_reg $I64))
-            (_ Unit (emit (MInst.AluRRRR op dst src1 src2 src3))))
+            (_ Unit (emit (MInst.AluRRRR op (operand_size ty) dst src1 src2 src3))))
        dst))

 ;; Helper for emitting `MInst.BitRR` instructions.
@@ -1656,19 +1653,12 @@
 ;; Helpers for generating `madd` instructions.

 (decl madd (Type Reg Reg Reg) Reg)
-(rule (madd (fits_in_32 _ty) x y z) (madd32 x y z))
-(rule (madd $I64 x y z) (madd64 x y z))
-
-(decl madd32 (Reg Reg Reg) Reg)
-(rule (madd32 x y z) (alu_rrrr (ALUOp3.MAdd32) x y z))
-
-(decl madd64 (Reg Reg Reg) Reg)
-(rule (madd64 x y z) (alu_rrrr (ALUOp3.MAdd64) x y z))
+(rule (madd ty x y z) (alu_rrrr (ALUOp3.MAdd) ty x y z))

 ;; Helpers for generating `msub` instructions.

-(decl msub64 (Reg Reg Reg) Reg)
-(rule (msub64 x y z) (alu_rrrr (ALUOp3.MSub64) x y z))
+(decl msub (Type Reg Reg Reg) Reg)
+(rule (msub ty x y z) (alu_rrrr (ALUOp3.MSub) ty x y z))

 ;; Helper for generating `uqadd` instructions.
 (decl uqadd (Reg Reg VectorSize) Reg)
--- a/cranelift/codegen/src/isa/aarch64/inst/emit.rs
+++ b/cranelift/codegen/src/isa/aarch64/inst/emit.rs
@@ -758,6 +758,7 @@ impl MachInstEmit for Inst {
            }
            &Inst::AluRRRR {
                alu_op,
+                size,
                rd,
                rm,
                rn,
@@ -769,11 +770,10 @@ impl MachInstEmit for Inst {
                let ra = allocs.next(ra);

                let (top11, bit15) = match alu_op {
-                    ALUOp3::MAdd32 => (0b0_00_11011_000, 0),
-                    ALUOp3::MSub32 => (0b0_00_11011_000, 1),
-                    ALUOp3::MAdd64 => (0b1_00_11011_000, 0),
-                    ALUOp3::MSub64 => (0b1_00_11011_000, 1),
+                    ALUOp3::MAdd => (0b0_00_11011_000, 0),
+                    ALUOp3::MSub => (0b0_00_11011_000, 1),
                };
+                let top11 = top11 | size.sf_bit() << 10;
                sink.put4(enc_arith_rrrr(top11, rm, bit15, ra, rn, rd));
            }
            &Inst::AluRRImm12 {
--- a/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs
+++ b/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs
@@ -995,7 +995,8 @@ fn test_aarch64_binemit() {

    insns.push((
        Inst::AluRRRR {
-            alu_op: ALUOp3::MAdd32,
+            alu_op: ALUOp3::MAdd,
+            size: OperandSize::Size32,
            rd: writable_xreg(1),
            rn: xreg(2),
            rm: xreg(3),
@@ -1006,7 +1007,8 @@ fn test_aarch64_binemit() {
    ));
    insns.push((
        Inst::AluRRRR {
-            alu_op: ALUOp3::MAdd64,
+            alu_op: ALUOp3::MAdd,
+            size: OperandSize::Size64,
            rd: writable_xreg(1),
            rn: xreg(2),
            rm: xreg(3),
@@ -1017,7 +1019,8 @@ fn test_aarch64_binemit() {
    ));
    insns.push((
        Inst::AluRRRR {
-            alu_op: ALUOp3::MSub32,
+            alu_op: ALUOp3::MSub,
+            size: OperandSize::Size32,
            rd: writable_xreg(1),
            rn: xreg(2),
            rm: xreg(3),
@@ -1028,7 +1031,8 @@ fn test_aarch64_binemit() {
    ));
    insns.push((
        Inst::AluRRRR {
-            alu_op: ALUOp3::MSub64,
+            alu_op: ALUOp3::MSub,
+            size: OperandSize::Size64,
            rd: writable_xreg(1),
            rn: xreg(2),
            rm: xreg(3),
--- a/cranelift/codegen/src/isa/aarch64/inst/mod.rs
+++ b/cranelift/codegen/src/isa/aarch64/inst/mod.rs
@@ -1288,16 +1288,15 @@ impl Inst {
            }
            &Inst::AluRRRR {
                alu_op,
+                size,
                rd,
                rn,
                rm,
                ra,
            } => {
-                let (op, size) = match alu_op {
-                    ALUOp3::MAdd32 => ("madd", OperandSize::Size32),
-                    ALUOp3::MAdd64 => ("madd", OperandSize::Size64),
-                    ALUOp3::MSub32 => ("msub", OperandSize::Size32),
-                    ALUOp3::MSub64 => ("msub", OperandSize::Size64),
+                let op = match alu_op {
+                    ALUOp3::MAdd => "madd",
+                    ALUOp3::MSub => "msub",
                };
                let rd = pretty_print_ireg(rd.to_reg(), size, allocs);
                let rn = pretty_print_ireg(rn, size, allocs);
--- a/cranelift/codegen/src/isa/aarch64/lower.isle
+++ b/cranelift/codegen/src/isa/aarch64/lower.isle
@@ -70,6 +70,10 @@
 (rule (lower (has_type (fits_in_64 ty) (iadd (imul x y) z)))
      (madd ty x y z))

+;; Fold an `isub` and `imul` combination into a `msub` instruction.
+(rule (lower (has_type (fits_in_64 ty) (isub x (imul y z))))
+      (msub ty y z x))
+
 ;; vectors

 (rule (lower (has_type ty @ (multi_lane _ _) (iadd x y)))
@@ -202,9 +206,9 @@
           ;; madd    dst_hi, x_hi, y_lo, dst_hi
           ;; madd    dst_lo, x_lo, y_lo, zero
           (dst_hi1 Reg (umulh $I64 x_lo y_lo))
-           (dst_hi2 Reg (madd64 x_lo y_hi dst_hi1))
-           (dst_hi Reg (madd64 x_hi y_lo dst_hi2))
-           (dst_lo Reg (madd64 x_lo y_lo (zero_reg))))
+           (dst_hi2 Reg (madd $I64 x_lo y_hi dst_hi1))
+           (dst_hi Reg (madd $I64 x_hi y_lo dst_hi2))
+           (dst_lo Reg (madd $I64 x_lo y_lo (zero_reg))))
        (value_regs dst_lo dst_hi)))

 ;; Case for i8x16, i16x8, and i32x4.
@@ -358,7 +362,7 @@
 (rule (lower (has_type (fits_in_32 ty) (smulhi x y)))
      (let ((x64 Reg (put_in_reg_sext64 x))
            (y64 Reg (put_in_reg_sext64 y))
-            (mul Reg (madd64 x64 y64 (zero_reg)))
+            (mul Reg (madd $I64 x64 y64 (zero_reg)))
            (result Reg (asr_imm $I64 mul (imm_shift_from_u8 (ty_bits ty)))))
        result))

@@ -368,11 +372,13 @@
      (umulh $I64 x y))

 (rule (lower (has_type (fits_in_32 ty) (umulhi x y)))
-      (let ((x64 Reg (put_in_reg_zext64 x))
-            (y64 Reg (put_in_reg_zext64 y))
-            (mul Reg (madd64 x64 y64 (zero_reg)))
-            (result Reg (lsr_imm $I64 mul (imm_shift_from_u8 (ty_bits ty)))))
-        result))
+      (let (
+          (x64 Reg (put_in_reg_zext64 x))
+          (y64 Reg (put_in_reg_zext64 y))
+          (mul Reg (madd $I64 x64 y64 (zero_reg)))
+          (result Reg (lsr_imm $I64 mul (imm_shift_from_u8 (ty_bits ty))))
+        )
+        (value_reg result)))

 ;;;; Rules for `udiv` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

@@ -459,14 +465,14 @@
      (let ((x64 Reg (put_in_reg_zext64 x))
            (y64 Reg (put_nonzero_in_reg_zext64 y))
            (div Reg (a64_udiv $I64 x64 y64))
-            (result Reg (msub64 div y64 x64)))
+            (result Reg (msub $I64 div y64 x64)))
        result))

 (rule (lower (has_type (fits_in_64 ty) (srem x y)))
      (let ((x64 Reg (put_in_reg_sext64 x))
            (y64 Reg (put_nonzero_in_reg_sext64 y))
            (div Reg (a64_sdiv $I64 x64 y64))
-            (result Reg (msub64 div y64 x64)))
+            (result Reg (msub $I64 div y64 x64)))
        result))

 ;;;; Rules for `uextend` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@@ -1014,7 +1020,7 @@
      (let ((hi_clz Reg (a64_clz $I64 (value_regs_get val 1)))
            (lo_clz Reg (a64_clz $I64 (value_regs_get val 0)))
            (tmp Reg (lsr_imm $I64 hi_clz (imm_shift_from_u8 6))))
-        (value_regs (madd64 lo_clz tmp hi_clz) (imm $I64 0))))
+        (value_regs (madd $I64 lo_clz tmp hi_clz) (imm $I64 0))))

 ;;;; Rules for `ctz` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

@@ -1062,7 +1068,7 @@
            (hi_cls Reg (a64_cls $I64 hi))
            (sign_eq_eon Reg (eon $I64 hi lo))
            (sign_eq Reg (lsr_imm $I64 sign_eq_eon (imm_shift_from_u8 63)))
-            (lo_sign_bits Reg (madd64 lo_cls sign_eq sign_eq))
+            (lo_sign_bits Reg (madd $I64 lo_cls sign_eq sign_eq))
            (maybe_lo Reg (with_flags_reg
                           (cmp64_imm hi_cls (u8_into_imm12 63))
                           (csel (Cond.Eq) lo_sign_bits (zero_reg)))))
--- a/cranelift/codegen/src/isa/aarch64/lower/isle/generated_code.manifest
+++ b/cranelift/codegen/src/isa/aarch64/lower/isle/generated_code.manifest
@@ -1,4 +1,4 @@
 src/clif.isle 443b34b797fc8ace
 src/prelude.isle afd037c4d91c875c
-src/isa/aarch64/inst.isle 544b7126192140d5
-src/isa/aarch64/lower.isle d88b62dd6b40622
+src/isa/aarch64/inst.isle a44074e06f955750
+src/isa/aarch64/lower.isle 71c7e603b0e4bdef
--- a/cranelift/codegen/src/isa/aarch64/lower/isle/generated_code.rs
+++ b/cranelift/codegen/src/isa/aarch64/lower/isle/generated_code.rs