arm64: Implement SIMD bitwise operations

2020-06-11 17:30:55 +01:00
parent 2cfaae85b0
commit 544c5dece5
5 changed files with 223 additions and 60 deletions
--- a/build.rs
+++ b/build.rs
@@ -181,6 +181,7 @@ fn ignore(testsuite: &str, testname: &str, strategy: &str) -> bool {
        },
        "Cranelift" => match (testsuite, testname) {
            ("simd", "simd_address") => return false,
            ("simd", "simd_bitwise") => return false,
            ("simd", "simd_i8x16_cmp") => return false,
            ("simd", "simd_i16x8_cmp") => return false,
            ("simd", "simd_i32x4_cmp") => return false,
--- a/cranelift/codegen/src/isa/aarch64/inst/emit.rs
+++ b/cranelift/codegen/src/isa/aarch64/inst/emit.rs
@@ -1035,7 +1035,7 @@ impl MachInstEmit for Inst {
            &Inst::VecMisc { op, rd, rn, ty } => {
                let bits_12_16 = match op {
                    VecMisc2::Not => {
-                        debug_assert_eq!(I8X16, ty);
+                        debug_assert_eq!(128, ty_bits(ty));
                        0b00101
                    }
                };
@@ -1256,6 +1256,28 @@ impl MachInstEmit for Inst {
                    VecALUOp::Cmgt => (0b010_01110_00_1 | enc_size_for_cmp << 1, 0b001101),
                    VecALUOp::Cmhi => (0b011_01110_00_1 | enc_size_for_cmp << 1, 0b001101),
                    VecALUOp::Cmhs => (0b011_01110_00_1 | enc_size_for_cmp << 1, 0b001111),
                    // The following instructions operate on bytes, so are not encoded differently
                    // for the different vector types.
                    VecALUOp::And => {
                        debug_assert_eq!(128, ty_bits(ty));
                        (0b010_01110_00_1, 0b000111)
                    }
                    VecALUOp::Bic => {
                        debug_assert_eq!(128, ty_bits(ty));
                        (0b010_01110_01_1, 0b000111)
                    }
                    VecALUOp::Orr => {
                        debug_assert_eq!(128, ty_bits(ty));
                        (0b010_01110_10_1, 0b000111)
                    }
                    VecALUOp::Eor => {
                        debug_assert_eq!(128, ty_bits(ty));
                        (0b011_01110_00_1, 0b000111)
                    }
                    VecALUOp::Bsl => {
                        debug_assert_eq!(128, ty_bits(ty));
                        (0b011_01110_01_1, 0b000111)
                    }
                };
                sink.put4(enc_vec_rrr(top11, rm, bit15_10, rn, rd));
            }
--- a/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs
+++ b/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs
@@ -2191,12 +2191,72 @@ fn test_aarch64_binemit() {
        "cmhs v8.4s, v2.4s, v15.4s",
    ));
    insns.push((
        Inst::VecRRR {
            alu_op: VecALUOp::And,
            rd: writable_vreg(20),
            rn: vreg(19),
            rm: vreg(18),
            ty: I32X4,
        },
        "741E324E",
        "and v20.16b, v19.16b, v18.16b",
    ));
    insns.push((
        Inst::VecRRR {
            alu_op: VecALUOp::Bic,
            rd: writable_vreg(8),
            rn: vreg(11),
            rm: vreg(1),
            ty: I8X16,
        },
        "681D614E",
        "bic v8.16b, v11.16b, v1.16b",
    ));
    insns.push((
        Inst::VecRRR {
            alu_op: VecALUOp::Orr,
            rd: writable_vreg(15),
            rn: vreg(2),
            rm: vreg(12),
            ty: I16X8,
        },
        "4F1CAC4E",
        "orr v15.16b, v2.16b, v12.16b",
    ));
    insns.push((
        Inst::VecRRR {
            alu_op: VecALUOp::Eor,
            rd: writable_vreg(18),
            rn: vreg(3),
            rm: vreg(22),
            ty: I8X16,
        },
        "721C366E",
        "eor v18.16b, v3.16b, v22.16b",
    ));
    insns.push((
        Inst::VecRRR {
            alu_op: VecALUOp::Bsl,
            rd: writable_vreg(8),
            rn: vreg(9),
            rm: vreg(1),
            ty: I8X16,
        },
        "281D616E",
        "bsl v8.16b, v9.16b, v1.16b",
    ));
    insns.push((
        Inst::VecMisc {
            op: VecMisc2::Not,
            rd: writable_vreg(2),
            rn: vreg(1),
-            ty: I8X16,
+            ty: I32X4,
        },
        "2258206E",
        "mvn v2.16b, v1.16b",
--- a/cranelift/codegen/src/isa/aarch64/inst/mod.rs
+++ b/cranelift/codegen/src/isa/aarch64/inst/mod.rs
@@ -225,6 +225,16 @@ pub enum VecALUOp {
    Cmhs,
    /// Compare unsigned higher or same
    Cmhi,
    /// Bitwise and
    And,
    /// Bitwise bit clear
    Bic,
    /// Bitwise inclusive or
    Orr,
    /// Bitwise exclusive or
    Eor,
    /// Bitwise select
    Bsl,
 }
 /// A Vector miscellaneous operation with two registers.
@@ -1273,8 +1283,14 @@ fn aarch64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
            collector.add_def(rd);
            collector.add_use(rn);
        }
-        &Inst::VecRRR { rd, rn, rm, .. } => {
+        &Inst::VecRRR {
            alu_op, rd, rn, rm, ..
        } => {
            if alu_op == VecALUOp::Bsl {
                collector.add_mod(rd);
            } else {
                collector.add_def(rd);
            }
            collector.add_use(rn);
            collector.add_use(rm);
        }
@@ -1851,12 +1867,17 @@ fn aarch64_map_regs<RUM: RegUsageMapper>(inst: &mut Inst, mapper: &RUM) {
            map_use(mapper, rn);
        }
        &mut Inst::VecRRR {
            alu_op,
            ref mut rd,
            ref mut rn,
            ref mut rm,
            ..
        } => {
            if alu_op == VecALUOp::Bsl {
                map_mod(mapper, rd);
            } else {
                map_def(mapper, rd);
            }
            map_use(mapper, rn);
            map_use(mapper, rm);
        }
@@ -2663,16 +2684,21 @@ impl ShowWithRRU for Inst {
                alu_op,
                ty,
            } => {
-                let (op, vector) = match alu_op {
+                let (op, vector, ty) = match alu_op {
-                    VecALUOp::SQAddScalar => ("sqadd", false),
+                    VecALUOp::SQAddScalar => ("sqadd", false, ty),
-                    VecALUOp::UQAddScalar => ("uqadd", false),
+                    VecALUOp::UQAddScalar => ("uqadd", false, ty),
-                    VecALUOp::SQSubScalar => ("sqsub", false),
+                    VecALUOp::SQSubScalar => ("sqsub", false, ty),
-                    VecALUOp::UQSubScalar => ("uqsub", false),
+                    VecALUOp::UQSubScalar => ("uqsub", false, ty),
-                    VecALUOp::Cmeq => ("cmeq", true),
+                    VecALUOp::Cmeq => ("cmeq", true, ty),
-                    VecALUOp::Cmge => ("cmge", true),
+                    VecALUOp::Cmge => ("cmge", true, ty),
-                    VecALUOp::Cmgt => ("cmgt", true),
+                    VecALUOp::Cmgt => ("cmgt", true, ty),
-                    VecALUOp::Cmhs => ("cmhs", true),
+                    VecALUOp::Cmhs => ("cmhs", true, ty),
-                    VecALUOp::Cmhi => ("cmhi", true),
+                    VecALUOp::Cmhi => ("cmhi", true, ty),
                    VecALUOp::And => ("and", true, I8X16),
                    VecALUOp::Bic => ("bic", true, I8X16),
                    VecALUOp::Orr => ("orr", true, I8X16),
                    VecALUOp::Eor => ("eor", true, I8X16),
                    VecALUOp::Bsl => ("bsl", true, I8X16),
                };
                let show_vreg_fn: fn(Reg, Option<&RealRegUniverse>, Type) -> String = if vector {
@@ -2686,9 +2712,14 @@ impl ShowWithRRU for Inst {
                let rm = show_vreg_fn(rm, mb_rru, ty);
                format!("{} {}, {}, {}", op, rd, rn, rm)
            }
-            &Inst::VecMisc { op, rd, rn, ty } => {
+            &Inst::VecMisc {
-                let op = match op {
+                op,
-                    VecMisc2::Not => "mvn",
+                rd,
                rn,
                ty: _ty,
            } => {
                let (op, ty) = match op {
                    VecMisc2::Not => ("mvn", I8X16),
                };
                let rd = show_vreg_vector(rd.to_reg(), mb_rru, ty);
--- a/cranelift/codegen/src/isa/aarch64/lower_inst.rs
+++ b/cranelift/codegen/src/isa/aarch64/lower_inst.rs
@@ -386,11 +386,21 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
        Opcode::Bnot => {
            let rd = output_to_reg(ctx, outputs[0]);
            let rm = input_to_rs_immlogic(ctx, inputs[0], NarrowValueMode::None);
            let ty = ty.unwrap();
            if ty_bits(ty) < 128 {
                let rm = input_to_rs_immlogic(ctx, inputs[0], NarrowValueMode::None);
                let alu_op = choose_32_64(ty, ALUOp::OrrNot32, ALUOp::OrrNot64);
                // NOT rd, rm ==> ORR_NOT rd, zero, rm
                ctx.emit(alu_inst_immlogic(alu_op, rd, zero_reg(), rm));
            } else {
                let rm = input_to_reg(ctx, inputs[0], NarrowValueMode::None);
                ctx.emit(Inst::VecMisc {
                    op: VecMisc2::Not,
                    rd,
                    rn: rm,
                    ty,
                });
            }
        }
        Opcode::Band
@@ -400,9 +410,10 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
        | Opcode::BorNot
        | Opcode::BxorNot => {
            let rd = output_to_reg(ctx, outputs[0]);
            let ty = ty.unwrap();
            if ty_bits(ty) < 128 {
                let rn = input_to_reg(ctx, inputs[0], NarrowValueMode::None);
                let rm = input_to_rs_immlogic(ctx, inputs[1], NarrowValueMode::None);
            let ty = ty.unwrap();
                let alu_op = match op {
                    Opcode::Band => choose_32_64(ty, ALUOp::And32, ALUOp::And64),
                    Opcode::Bor => choose_32_64(ty, ALUOp::Orr32, ALUOp::Orr64),
@@ -413,6 +424,27 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
                    _ => unreachable!(),
                };
                ctx.emit(alu_inst_immlogic(alu_op, rd, rn, rm));
            } else {
                let alu_op = match op {
                    Opcode::Band => VecALUOp::And,
                    Opcode::BandNot => VecALUOp::Bic,
                    Opcode::Bor => VecALUOp::Orr,
                    Opcode::Bxor => VecALUOp::Eor,
                    _ => unreachable!(),
                };
                let rn = input_to_reg(ctx, inputs[0], NarrowValueMode::None);
                let rm = input_to_reg(ctx, inputs[1], NarrowValueMode::None);
                let rd = output_to_reg(ctx, outputs[0]);
                ctx.emit(Inst::VecRRR {
                    alu_op,
                    rd,
                    rn,
                    rm,
                    ty,
                });
            }
        }
        Opcode::Ishl | Opcode::Ushr | Opcode::Sshr => {
@@ -1035,6 +1067,8 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
        }
        Opcode::Bitselect => {
            let ty = ty.unwrap();
            if ty_bits(ty) < 128 {
                let tmp = ctx.alloc_tmp(RegClass::I64, I64);
                let rd = output_to_reg(ctx, outputs[0]);
                let rcond = input_to_reg(ctx, inputs[0], NarrowValueMode::None);
@@ -1061,6 +1095,21 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
                    rn: rd.to_reg(),
                    rm: tmp.to_reg(),
                });
            } else {
                let rcond = input_to_reg(ctx, inputs[0], NarrowValueMode::None);
                let rn = input_to_reg(ctx, inputs[1], NarrowValueMode::None);
                let rm = input_to_reg(ctx, inputs[2], NarrowValueMode::None);
                let rd = output_to_reg(ctx, outputs[0]);
                ctx.emit(Inst::gen_move(rd, rcond, ty));
                ctx.emit(Inst::VecRRR {
                    alu_op: VecALUOp::Bsl,
                    rd,
                    rn,
                    rm,
                    ty,
                });
            }
        }
        Opcode::Trueif => {