Vector bitcast support (AArch64 & Interpreter) (#4820)

* Vector bitcast support (AArch64 & Interpreter) Implemented support for `bitcast` on vector values for AArch64 and the interpreter. Also corrected the verifier to ensure that the size, in bits, of the input and output types match for a `bitcast`, per the docs. Copyright (c) 2022 Arm Limited * `I128` same-type bitcast support Copyright (c) 2022 Arm Limited * Directly return input for 64-bit GPR<=>GPR bitcast Copyright (c) 2022 Arm Limited
2022-09-21 17:20:28 +01:00
parent 05cbd667c7
commit e786bda002
15 changed files with 478 additions and 26 deletions
--- a/cranelift/codegen/src/isa/aarch64/inst.isle
+++ b/cranelift/codegen/src/isa/aarch64/inst.isle
@@ -1738,6 +1738,13 @@
 (decl writable_zero_reg () WritableReg)
 (extern constructor writable_zero_reg writable_zero_reg)

+;; Helper for emitting `MInst.Mov` instructions.
+(decl mov (Reg Type) Reg)
+(rule (mov src ty)
+      (let ((dst WritableReg (temp_writable_reg $I64))
+            (_ Unit (emit (MInst.Mov (operand_size ty) dst src))))
+        dst))
+
 ;; Helper for emitting `MInst.MovZ` instructions.
 (decl movz (MoveWideConst OperandSize) Reg)
 (rule (movz imm size)
@@ -2093,6 +2100,17 @@
            (_ Unit (emit (MInst.FpuRound op dst rn))))
        dst))

+;; Helper for emitting `MInst.FpuMove64` and `MInst.FpuMove128` instructions.
+(decl fpu_move (Type Reg) Reg)
+(rule (fpu_move _ src)
+      (let ((dst WritableReg (temp_writable_reg $I8X16))
+            (_ Unit (emit (MInst.FpuMove128 dst src))))
+        dst))
+(rule (fpu_move (fits_in_64 _) src)
+      (let ((dst WritableReg (temp_writable_reg $F64))
+            (_ Unit (emit (MInst.FpuMove64 dst src))))
+        dst))
+
 ;; Helper for emitting `MInst.MovToFpu` instructions.
 (decl mov_to_fpu (Reg ScalarSize) Reg)
 (rule (mov_to_fpu x size)
--- a/cranelift/codegen/src/isa/aarch64/lower.isle
+++ b/cranelift/codegen/src/isa/aarch64/lower.isle
@@ -2209,17 +2209,26 @@

 ;;; Rules for `bitcast` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

-(rule (lower (has_type $I32 (bitcast src @ (value_type $F32))))
-      (mov_from_vec src 0 (ScalarSize.Size32)))
+; SIMD&FP <=> SIMD&FP
+(rule (lower (has_type (ty_float_or_vec out_ty) (bitcast x @ (value_type (ty_float_or_vec _)))))
+      (fpu_move out_ty x))

-(rule (lower (has_type $F32 (bitcast src @ (value_type $I32))))
-      (mov_to_fpu src (ScalarSize.Size32)))
+; GPR => SIMD&FP
+(rule (lower (has_type (ty_float_or_vec _) (bitcast x @ (value_type in_ty))))
+      (if (ty_int_bool_ref_scalar_64 in_ty))
+      (mov_to_fpu x (scalar_size in_ty)))

-(rule (lower (has_type $I64 (bitcast src @ (value_type $F64))))
-      (mov_from_vec src 0 (ScalarSize.Size64)))
+; SIMD&FP => GPR
+(rule (lower (has_type out_ty (bitcast x @ (value_type (fits_in_64 (ty_float_or_vec _))))))
+      (if (ty_int_bool_ref_scalar_64 out_ty))
+      (mov_from_vec x 0 (scalar_size out_ty)))

-(rule (lower (has_type $F64 (bitcast src @ (value_type $I64))))
-      (mov_to_fpu src (ScalarSize.Size64)))
+; GPR <=> GPR
+(rule (lower (has_type out_ty (bitcast x @ (value_type in_ty))))
+      (if (ty_int_bool_ref_scalar_64 out_ty))
+      (if (ty_int_bool_ref_scalar_64 in_ty))
+      x)
+(rule (lower (has_type $I128 (bitcast x @ (value_type $I128)))) x)

 ;;; Rules for `raw_bitcast` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

--- a/cranelift/codegen/src/isa/aarch64/lower_inst.rs
+++ b/cranelift/codegen/src/isa/aarch64/lower_inst.rs
@@ -268,12 +268,7 @@ pub(crate) fn lower_insn_to_regs(

        Opcode::IsNull | Opcode::IsInvalid => implemented_in_isle(ctx),

-        Opcode::Copy => {
-            let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
-            let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
-            let ty = ctx.input_ty(insn, 0);
-            ctx.emit(Inst::gen_move(rd, rn, ty));
-        }
+        Opcode::Copy => implemented_in_isle(ctx),

        Opcode::Breduce | Opcode::Ireduce => implemented_in_isle(ctx),

--- a/cranelift/codegen/src/verifier/mod.rs
+++ b/cranelift/codegen/src/verifier/mod.rs
@@ -1096,15 +1096,26 @@ impl<'a> Verifier<'a> {
        let typ = self.func.dfg.ctrl_typevar(inst);
        let value_type = self.func.dfg.value_type(arg);

-        if typ.lane_bits() < value_type.lane_bits() {
+        if typ.lane_bits() != value_type.lane_bits() {
            errors.fatal((
                inst,
                format!(
-                    "The bitcast argument {} doesn't fit in a type of {} bits",
+                    "The bitcast argument {} has a lane type of {} bits, which doesn't match an expected type of {} bits",
                    arg,
+                    value_type.lane_bits(),
                    typ.lane_bits()
                ),
            ))
+        } else if typ.bits() != value_type.bits() {
+            errors.fatal((
+                inst,
+                format!(
+                    "The bitcast argument {} has a type of {} bits, which doesn't match an expected type of {} bits",
+                    arg,
+                    value_type.bits(),
+                    typ.bits()
+                ),
+            ))
        } else {
            Ok(())
        }