AArch64: Add various missing SIMD bits

2020-09-07 13:01:47 +01:00
parent 074a0afa83
commit f612e8e7b2
7 changed files with 59 additions and 62 deletions
--- a/cranelift/codegen/src/isa/aarch64/abi.rs
+++ b/cranelift/codegen/src/isa/aarch64/abi.rs
@@ -311,11 +311,12 @@ impl ABIMachineSpec for AArch64MachineDeps {

    fn gen_stack_lower_bound_trap(limit_reg: Reg) -> SmallVec<[Inst; 2]> {
        let mut insts = SmallVec::new();
-        insts.push(Inst::AluRRR {
-            alu_op: ALUOp::SubS64XR,
+        insts.push(Inst::AluRRRExtend {
+            alu_op: ALUOp::SubS64,
            rd: writable_zero_reg(),
            rn: stack_reg(),
            rm: limit_reg,
+            extendop: ExtendOp::UXTX,
        });
        insts.push(Inst::TrapIf {
            trap_info: (ir::SourceLoc::default(), ir::TrapCode::StackOverflow),
@@ -373,10 +374,7 @@ impl ABIMachineSpec for AArch64MachineDeps {
            ret.push(adj_inst);
        } else {
            let tmp = writable_spilltmp_reg();
-            let const_inst = Inst::LoadConst64 {
-                rd: tmp,
-                const_data: amount,
-            };
+            let const_inst = Inst::load_constant(tmp, amount);
            let adj_inst = Inst::AluRRRExtend {
                alu_op,
                rd: writable_stack_reg(),
@@ -384,7 +382,7 @@ impl ABIMachineSpec for AArch64MachineDeps {
                rm: tmp.to_reg(),
                extendop: ExtendOp::UXTX,
            };
-            ret.push(const_inst);
+            ret.extend(const_inst);
            ret.push(adj_inst);
        }
        ret
--- a/cranelift/codegen/src/isa/aarch64/inst/args.rs
+++ b/cranelift/codegen/src/isa/aarch64/inst/args.rs
@@ -575,7 +575,7 @@ impl ScalarSize {
            32 => ScalarSize::Size32,
            64 => ScalarSize::Size64,
            128 => ScalarSize::Size128,
-            _ => panic!("Unexpected type width"),
+            w => panic!("Unexpected type width: {}", w),
        }
    }

@@ -591,7 +591,7 @@ impl ScalarSize {
            ScalarSize::Size16 => 0b11,
            ScalarSize::Size32 => 0b00,
            ScalarSize::Size64 => 0b01,
-            _ => panic!("Unexpected scalar FP operand size"),
+            _ => panic!("Unexpected scalar FP operand size: {:?}", self),
        }
    }
 }
@@ -612,6 +612,7 @@ impl VectorSize {
    /// Convert from a type into a vector operand size.
    pub fn from_ty(ty: Type) -> VectorSize {
        match ty {
+            B32X4 => VectorSize::Size32x4,
            F32X2 => VectorSize::Size32x2,
            F32X4 => VectorSize::Size32x4,
            F64X2 => VectorSize::Size64x2,
@@ -622,7 +623,7 @@ impl VectorSize {
            I32X2 => VectorSize::Size32x2,
            I32X4 => VectorSize::Size32x4,
            I64X2 => VectorSize::Size64x2,
-            _ => unimplemented!(),
+            _ => unimplemented!("Unsupported type: {}", ty),
        }
    }

--- a/cranelift/codegen/src/isa/aarch64/inst/emit.rs
+++ b/cranelift/codegen/src/isa/aarch64/inst/emit.rs
@@ -497,7 +497,6 @@ impl MachInstEmit for Inst {
                    ALUOp::AddS64 => 0b10101011_000,
                    ALUOp::SubS32 => 0b01101011_000,
                    ALUOp::SubS64 => 0b11101011_000,
-                    ALUOp::SubS64XR => 0b11101011_001,
                    ALUOp::SDiv64 => 0b10011010_110,
                    ALUOp::UDiv64 => 0b10011010_110,
                    ALUOp::RotR32 | ALUOp::Lsr32 | ALUOp::Asr32 | ALUOp::Lsl32 => 0b00011010_110,
@@ -512,17 +511,13 @@ impl MachInstEmit for Inst {
                    ALUOp::Lsr32 | ALUOp::Lsr64 => 0b001001,
                    ALUOp::Asr32 | ALUOp::Asr64 => 0b001010,
                    ALUOp::Lsl32 | ALUOp::Lsl64 => 0b001000,
-                    ALUOp::SubS64XR => 0b011000,
                    ALUOp::SMulH | ALUOp::UMulH => 0b011111,
                    _ => 0b000000,
                };
                debug_assert_ne!(writable_stack_reg(), rd);
-                // The stack pointer is the zero register if this instruction
-                // doesn't have access to extended registers, so this might be
-                // an indication that something is wrong.
-                if alu_op != ALUOp::SubS64XR {
+                // The stack pointer is the zero register in this context, so this might be an
+                // indication that something is wrong.
                debug_assert_ne!(stack_reg(), rn);
-                }
                debug_assert_ne!(stack_reg(), rm);
                sink.put4(enc_arith_rrr(top11, bit15_10, rd, rn, rm));
            }
@@ -2079,19 +2074,6 @@ impl MachInstEmit for Inst {
                // disable the worst-case-size check in this case.
                start_off = sink.cur_offset();
            }
-            &Inst::LoadConst64 { rd, const_data } => {
-                let inst = Inst::ULoad64 {
-                    rd,
-                    mem: AMode::Label(MemLabel::PCRel(8)),
-                    srcloc: None, // can't cause a user trap.
-                };
-                inst.emit(sink, flags, state);
-                let inst = Inst::Jump {
-                    dest: BranchTarget::ResolvedOffset(12),
-                };
-                inst.emit(sink, flags, state);
-                sink.put8(const_data);
-            }
            &Inst::LoadExtName {
                rd,
                ref name,
--- a/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs
+++ b/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs
@@ -777,14 +777,15 @@ fn test_aarch64_binemit() {
    ));

    insns.push((
-        Inst::AluRRR {
-            alu_op: ALUOp::SubS64XR,
+        Inst::AluRRRExtend {
+            alu_op: ALUOp::SubS64,
            rd: writable_zero_reg(),
            rn: stack_reg(),
            rm: xreg(12),
+            extendop: ExtendOp::UXTX,
        },
        "FF632CEB",
-        "subs xzr, sp, x12",
+        "subs xzr, sp, x12, UXTX",
    ));

    insns.push((
--- a/cranelift/codegen/src/isa/aarch64/inst/mod.rs
+++ b/cranelift/codegen/src/isa/aarch64/inst/mod.rs
@@ -45,15 +45,11 @@ pub enum ALUOp {
    Sub64,
    Orr32,
    Orr64,
-    /// NOR
    OrrNot32,
-    /// NOR
    OrrNot64,
    And32,
    And64,
-    /// NAND
    AndNot32,
-    /// NAND
    AndNot64,
    /// XOR (AArch64 calls this "EOR")
    Eor32,
@@ -71,8 +67,6 @@ pub enum ALUOp {
    SubS32,
    /// Sub, setting flags
    SubS64,
-    /// Sub, setting flags, using extended registers
-    SubS64XR,
    /// Signed multiply, high-word result
    SMulH,
    /// Unsigned multiply, high-word result
@@ -1078,12 +1072,6 @@ pub enum Inst {
        rtmp2: Writable<Reg>,
    },

-    /// Load an inline constant.
-    LoadConst64 {
-        rd: Writable<Reg>,
-        const_data: u64,
-    },
-
    /// Load an inline symbol reference.
    LoadExtName {
        rd: Writable<Reg>,
@@ -1309,7 +1297,22 @@ impl Inst {
                mem,
                srcloc: None,
            },
-            _ => unimplemented!("gen_load({})", ty),
+            _ => {
+                if ty.is_vector() {
+                    let bits = ty_bits(ty);
+                    let rd = into_reg;
+                    let srcloc = None;
+
+                    if bits == 128 {
+                        Inst::FpuLoad128 { rd, mem, srcloc }
+                    } else {
+                        assert_eq!(bits, 64);
+                        Inst::FpuLoad64 { rd, mem, srcloc }
+                    }
+                } else {
+                    unimplemented!("gen_load({})", ty);
+                }
+            }
        }
    }

@@ -1346,7 +1349,22 @@ impl Inst {
                mem,
                srcloc: None,
            },
-            _ => unimplemented!("gen_store({})", ty),
+            _ => {
+                if ty.is_vector() {
+                    let bits = ty_bits(ty);
+                    let rd = from_reg;
+                    let srcloc = None;
+
+                    if bits == 128 {
+                        Inst::FpuStore128 { rd, mem, srcloc }
+                    } else {
+                        assert_eq!(bits, 64);
+                        Inst::FpuStore64 { rd, mem, srcloc }
+                    }
+                } else {
+                    unimplemented!("gen_store({})", ty);
+                }
+            }
        }
    }
 }
@@ -1736,7 +1754,7 @@ fn aarch64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
            collector.add_def(rtmp1);
            collector.add_def(rtmp2);
        }
-        &Inst::LoadConst64 { rd, .. } | &Inst::LoadExtName { rd, .. } => {
+        &Inst::LoadExtName { rd, .. } => {
            collector.add_def(rd);
        }
        &Inst::LoadAddr { rd, mem: _ } => {
@@ -2427,9 +2445,6 @@ fn aarch64_map_regs<RUM: RegUsageMapper>(inst: &mut Inst, mapper: &RUM) {
            map_def(mapper, rtmp1);
            map_def(mapper, rtmp2);
        }
-        &mut Inst::LoadConst64 { ref mut rd, .. } => {
-            map_def(mapper, rd);
-        }
        &mut Inst::LoadExtName { ref mut rd, .. } => {
            map_def(mapper, rd);
        }
@@ -2632,7 +2647,6 @@ impl Inst {
                ALUOp::AddS64 => ("adds", OperandSize::Size64),
                ALUOp::SubS32 => ("subs", OperandSize::Size32),
                ALUOp::SubS64 => ("subs", OperandSize::Size64),
-                ALUOp::SubS64XR => ("subs", OperandSize::Size64),
                ALUOp::SMulH => ("smulh", OperandSize::Size64),
                ALUOp::UMulH => ("umulh", OperandSize::Size64),
                ALUOp::SDiv64 => ("sdiv", OperandSize::Size64),
@@ -3535,10 +3549,6 @@ impl Inst {
                    info.targets
                )
            }
-            &Inst::LoadConst64 { rd, const_data } => {
-                let rd = rd.show_rru(mb_rru);
-                format!("ldr {}, 8 ; b 12 ; data {:?}", rd, const_data)
-            }
            &Inst::LoadExtName {
                rd,
                ref name,
--- a/cranelift/filetests/filetests/vcode/aarch64/stack-limit.clif
+++ b/cranelift/filetests/filetests/vcode/aarch64/stack-limit.clif
@@ -105,7 +105,8 @@ block0(v0: i64):
 ; nextln:     add x16, x0, x17, UXTX
 ; nextln:     subs xzr, sp, x16
 ; nextln:     b.hs 8 ; udf
-; nextln:     ldr x16, 8 ; b 12 ; data 400000
+; nextln:     movz w16, #6784
+; nextln:     movk w16, #6, LSL #16
 ; nextln:     sub sp, sp, x16, UXTX
 ; nextln:     mov sp, fp
 ; nextln:     ldp fp, lr, [sp], #16
@@ -154,7 +155,8 @@ block0(v0: i64):
 ; nextln:     add x16, x16, x17, UXTX
 ; nextln:     subs xzr, sp, x16
 ; nextln:     b.hs 8 ; udf
-; nextln:     ldr x16, 8 ; b 12 ; data 400000
+; nextln:     movz w16, #6784
+; nextln:     movk w16, #6, LSL #16
 ; nextln:     sub sp, sp, x16, UXTX
 ; nextln:     mov sp, fp
 ; nextln:     ldp fp, lr, [sp], #16
--- a/cranelift/filetests/filetests/vcode/aarch64/stack.clif
+++ b/cranelift/filetests/filetests/vcode/aarch64/stack.clif
@@ -29,7 +29,8 @@ block0:

 ; check: stp fp, lr, [sp, #-16]!
 ; nextln: mov fp, sp
-; nextln: ldr x16, 8 ; b 12 ; data 100016
+; nextln: movz w16, #34480
+; nextln: movk w16, #1, LSL #16
 ; nextln: sub sp, sp, x16, UXTX
 ; nextln: mov x0, sp
 ; nextln: mov sp, fp
@@ -68,7 +69,8 @@ block0:

 ; check: stp fp, lr, [sp, #-16]!
 ; nextln: mov fp, sp
-; nextln: ldr x16, 8 ; b 12 ; data 100016
+; nextln: movz w16, #34480
+; nextln: movk w16, #1, LSL #16
 ; nextln: sub sp, sp, x16, UXTX
 ; nextln: mov x0, sp
 ; nextln: ldr x0, [x0]
@@ -106,7 +108,8 @@ block0(v0: i64):

 ; check: stp fp, lr, [sp, #-16]!
 ; nextln: mov fp, sp
-; nextln: ldr x16, 8 ; b 12 ; data 100016
+; nextln: movz w16, #34480
+; nextln: movk w16, #1, LSL #16
 ; nextln: sub sp, sp, x16, UXTX
 ; nextln: mov x1, sp
 ; nextln: str x0, [x1]