diff --git a/build.rs b/build.rs index 8792f6e4d7..2cbb4b3811 100644 --- a/build.rs +++ b/build.rs @@ -181,6 +181,7 @@ fn ignore(testsuite: &str, testname: &str, strategy: &str) -> bool { }, "Cranelift" => match (testsuite, testname) { ("simd", "simd_address") => return false, + ("simd", "simd_bitwise") => return false, ("simd", "simd_i8x16_cmp") => return false, ("simd", "simd_i16x8_cmp") => return false, ("simd", "simd_i32x4_cmp") => return false, diff --git a/cranelift/codegen/src/isa/aarch64/inst/emit.rs b/cranelift/codegen/src/isa/aarch64/inst/emit.rs index e59eab6306..d4bf3055ed 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/emit.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/emit.rs @@ -1035,7 +1035,7 @@ impl MachInstEmit for Inst { &Inst::VecMisc { op, rd, rn, ty } => { let bits_12_16 = match op { VecMisc2::Not => { - debug_assert_eq!(I8X16, ty); + debug_assert_eq!(128, ty_bits(ty)); 0b00101 } }; @@ -1256,6 +1256,28 @@ impl MachInstEmit for Inst { VecALUOp::Cmgt => (0b010_01110_00_1 | enc_size_for_cmp << 1, 0b001101), VecALUOp::Cmhi => (0b011_01110_00_1 | enc_size_for_cmp << 1, 0b001101), VecALUOp::Cmhs => (0b011_01110_00_1 | enc_size_for_cmp << 1, 0b001111), + // The following instructions operate on bytes, so are not encoded differently + // for the different vector types. + VecALUOp::And => { + debug_assert_eq!(128, ty_bits(ty)); + (0b010_01110_00_1, 0b000111) + } + VecALUOp::Bic => { + debug_assert_eq!(128, ty_bits(ty)); + (0b010_01110_01_1, 0b000111) + } + VecALUOp::Orr => { + debug_assert_eq!(128, ty_bits(ty)); + (0b010_01110_10_1, 0b000111) + } + VecALUOp::Eor => { + debug_assert_eq!(128, ty_bits(ty)); + (0b011_01110_00_1, 0b000111) + } + VecALUOp::Bsl => { + debug_assert_eq!(128, ty_bits(ty)); + (0b011_01110_01_1, 0b000111) + } }; sink.put4(enc_vec_rrr(top11, rm, bit15_10, rn, rd)); } diff --git a/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs b/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs index 01bb2c38e1..0cb54ceced 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs @@ -2191,12 +2191,72 @@ fn test_aarch64_binemit() { "cmhs v8.4s, v2.4s, v15.4s", )); + insns.push(( + Inst::VecRRR { + alu_op: VecALUOp::And, + rd: writable_vreg(20), + rn: vreg(19), + rm: vreg(18), + ty: I32X4, + }, + "741E324E", + "and v20.16b, v19.16b, v18.16b", + )); + + insns.push(( + Inst::VecRRR { + alu_op: VecALUOp::Bic, + rd: writable_vreg(8), + rn: vreg(11), + rm: vreg(1), + ty: I8X16, + }, + "681D614E", + "bic v8.16b, v11.16b, v1.16b", + )); + + insns.push(( + Inst::VecRRR { + alu_op: VecALUOp::Orr, + rd: writable_vreg(15), + rn: vreg(2), + rm: vreg(12), + ty: I16X8, + }, + "4F1CAC4E", + "orr v15.16b, v2.16b, v12.16b", + )); + + insns.push(( + Inst::VecRRR { + alu_op: VecALUOp::Eor, + rd: writable_vreg(18), + rn: vreg(3), + rm: vreg(22), + ty: I8X16, + }, + "721C366E", + "eor v18.16b, v3.16b, v22.16b", + )); + + insns.push(( + Inst::VecRRR { + alu_op: VecALUOp::Bsl, + rd: writable_vreg(8), + rn: vreg(9), + rm: vreg(1), + ty: I8X16, + }, + "281D616E", + "bsl v8.16b, v9.16b, v1.16b", + )); + insns.push(( Inst::VecMisc { op: VecMisc2::Not, rd: writable_vreg(2), rn: vreg(1), - ty: I8X16, + ty: I32X4, }, "2258206E", "mvn v2.16b, v1.16b", diff --git a/cranelift/codegen/src/isa/aarch64/inst/mod.rs b/cranelift/codegen/src/isa/aarch64/inst/mod.rs index 1486490c55..831481814c 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/mod.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/mod.rs @@ -225,6 +225,16 @@ pub enum VecALUOp { Cmhs, /// Compare unsigned higher or same Cmhi, + /// Bitwise and + And, + /// Bitwise bit clear + Bic, + /// Bitwise inclusive or + Orr, + /// Bitwise exclusive or + Eor, + /// Bitwise select + Bsl, } /// A Vector miscellaneous operation with two registers. @@ -1273,8 +1283,14 @@ fn aarch64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) { collector.add_def(rd); collector.add_use(rn); } - &Inst::VecRRR { rd, rn, rm, .. } => { - collector.add_def(rd); + &Inst::VecRRR { + alu_op, rd, rn, rm, .. + } => { + if alu_op == VecALUOp::Bsl { + collector.add_mod(rd); + } else { + collector.add_def(rd); + } collector.add_use(rn); collector.add_use(rm); } @@ -1851,12 +1867,17 @@ fn aarch64_map_regs(inst: &mut Inst, mapper: &RUM) { map_use(mapper, rn); } &mut Inst::VecRRR { + alu_op, ref mut rd, ref mut rn, ref mut rm, .. } => { - map_def(mapper, rd); + if alu_op == VecALUOp::Bsl { + map_mod(mapper, rd); + } else { + map_def(mapper, rd); + } map_use(mapper, rn); map_use(mapper, rm); } @@ -2663,16 +2684,21 @@ impl ShowWithRRU for Inst { alu_op, ty, } => { - let (op, vector) = match alu_op { - VecALUOp::SQAddScalar => ("sqadd", false), - VecALUOp::UQAddScalar => ("uqadd", false), - VecALUOp::SQSubScalar => ("sqsub", false), - VecALUOp::UQSubScalar => ("uqsub", false), - VecALUOp::Cmeq => ("cmeq", true), - VecALUOp::Cmge => ("cmge", true), - VecALUOp::Cmgt => ("cmgt", true), - VecALUOp::Cmhs => ("cmhs", true), - VecALUOp::Cmhi => ("cmhi", true), + let (op, vector, ty) = match alu_op { + VecALUOp::SQAddScalar => ("sqadd", false, ty), + VecALUOp::UQAddScalar => ("uqadd", false, ty), + VecALUOp::SQSubScalar => ("sqsub", false, ty), + VecALUOp::UQSubScalar => ("uqsub", false, ty), + VecALUOp::Cmeq => ("cmeq", true, ty), + VecALUOp::Cmge => ("cmge", true, ty), + VecALUOp::Cmgt => ("cmgt", true, ty), + VecALUOp::Cmhs => ("cmhs", true, ty), + VecALUOp::Cmhi => ("cmhi", true, ty), + VecALUOp::And => ("and", true, I8X16), + VecALUOp::Bic => ("bic", true, I8X16), + VecALUOp::Orr => ("orr", true, I8X16), + VecALUOp::Eor => ("eor", true, I8X16), + VecALUOp::Bsl => ("bsl", true, I8X16), }; let show_vreg_fn: fn(Reg, Option<&RealRegUniverse>, Type) -> String = if vector { @@ -2686,9 +2712,14 @@ impl ShowWithRRU for Inst { let rm = show_vreg_fn(rm, mb_rru, ty); format!("{} {}, {}, {}", op, rd, rn, rm) } - &Inst::VecMisc { op, rd, rn, ty } => { - let op = match op { - VecMisc2::Not => "mvn", + &Inst::VecMisc { + op, + rd, + rn, + ty: _ty, + } => { + let (op, ty) = match op { + VecMisc2::Not => ("mvn", I8X16), }; let rd = show_vreg_vector(rd.to_reg(), mb_rru, ty); diff --git a/cranelift/codegen/src/isa/aarch64/lower_inst.rs b/cranelift/codegen/src/isa/aarch64/lower_inst.rs index 391773c472..bce7276552 100644 --- a/cranelift/codegen/src/isa/aarch64/lower_inst.rs +++ b/cranelift/codegen/src/isa/aarch64/lower_inst.rs @@ -386,11 +386,21 @@ pub(crate) fn lower_insn_to_regs>( Opcode::Bnot => { let rd = output_to_reg(ctx, outputs[0]); - let rm = input_to_rs_immlogic(ctx, inputs[0], NarrowValueMode::None); let ty = ty.unwrap(); - let alu_op = choose_32_64(ty, ALUOp::OrrNot32, ALUOp::OrrNot64); - // NOT rd, rm ==> ORR_NOT rd, zero, rm - ctx.emit(alu_inst_immlogic(alu_op, rd, zero_reg(), rm)); + if ty_bits(ty) < 128 { + let rm = input_to_rs_immlogic(ctx, inputs[0], NarrowValueMode::None); + let alu_op = choose_32_64(ty, ALUOp::OrrNot32, ALUOp::OrrNot64); + // NOT rd, rm ==> ORR_NOT rd, zero, rm + ctx.emit(alu_inst_immlogic(alu_op, rd, zero_reg(), rm)); + } else { + let rm = input_to_reg(ctx, inputs[0], NarrowValueMode::None); + ctx.emit(Inst::VecMisc { + op: VecMisc2::Not, + rd, + rn: rm, + ty, + }); + } } Opcode::Band @@ -400,19 +410,41 @@ pub(crate) fn lower_insn_to_regs>( | Opcode::BorNot | Opcode::BxorNot => { let rd = output_to_reg(ctx, outputs[0]); - let rn = input_to_reg(ctx, inputs[0], NarrowValueMode::None); - let rm = input_to_rs_immlogic(ctx, inputs[1], NarrowValueMode::None); let ty = ty.unwrap(); - let alu_op = match op { - Opcode::Band => choose_32_64(ty, ALUOp::And32, ALUOp::And64), - Opcode::Bor => choose_32_64(ty, ALUOp::Orr32, ALUOp::Orr64), - Opcode::Bxor => choose_32_64(ty, ALUOp::Eor32, ALUOp::Eor64), - Opcode::BandNot => choose_32_64(ty, ALUOp::AndNot32, ALUOp::AndNot64), - Opcode::BorNot => choose_32_64(ty, ALUOp::OrrNot32, ALUOp::OrrNot64), - Opcode::BxorNot => choose_32_64(ty, ALUOp::EorNot32, ALUOp::EorNot64), - _ => unreachable!(), - }; - ctx.emit(alu_inst_immlogic(alu_op, rd, rn, rm)); + if ty_bits(ty) < 128 { + let rn = input_to_reg(ctx, inputs[0], NarrowValueMode::None); + let rm = input_to_rs_immlogic(ctx, inputs[1], NarrowValueMode::None); + let alu_op = match op { + Opcode::Band => choose_32_64(ty, ALUOp::And32, ALUOp::And64), + Opcode::Bor => choose_32_64(ty, ALUOp::Orr32, ALUOp::Orr64), + Opcode::Bxor => choose_32_64(ty, ALUOp::Eor32, ALUOp::Eor64), + Opcode::BandNot => choose_32_64(ty, ALUOp::AndNot32, ALUOp::AndNot64), + Opcode::BorNot => choose_32_64(ty, ALUOp::OrrNot32, ALUOp::OrrNot64), + Opcode::BxorNot => choose_32_64(ty, ALUOp::EorNot32, ALUOp::EorNot64), + _ => unreachable!(), + }; + ctx.emit(alu_inst_immlogic(alu_op, rd, rn, rm)); + } else { + let alu_op = match op { + Opcode::Band => VecALUOp::And, + Opcode::BandNot => VecALUOp::Bic, + Opcode::Bor => VecALUOp::Orr, + Opcode::Bxor => VecALUOp::Eor, + _ => unreachable!(), + }; + + let rn = input_to_reg(ctx, inputs[0], NarrowValueMode::None); + let rm = input_to_reg(ctx, inputs[1], NarrowValueMode::None); + let rd = output_to_reg(ctx, outputs[0]); + + ctx.emit(Inst::VecRRR { + alu_op, + rd, + rn, + rm, + ty, + }); + } } Opcode::Ishl | Opcode::Ushr | Opcode::Sshr => { @@ -1035,32 +1067,49 @@ pub(crate) fn lower_insn_to_regs>( } Opcode::Bitselect => { - let tmp = ctx.alloc_tmp(RegClass::I64, I64); - let rd = output_to_reg(ctx, outputs[0]); - let rcond = input_to_reg(ctx, inputs[0], NarrowValueMode::None); - let rn = input_to_reg(ctx, inputs[1], NarrowValueMode::None); - let rm = input_to_reg(ctx, inputs[2], NarrowValueMode::None); - // AND rTmp, rn, rcond - ctx.emit(Inst::AluRRR { - alu_op: ALUOp::And64, - rd: tmp, - rn, - rm: rcond, - }); - // BIC rd, rm, rcond - ctx.emit(Inst::AluRRR { - alu_op: ALUOp::AndNot64, - rd, - rn: rm, - rm: rcond, - }); - // ORR rd, rd, rTmp - ctx.emit(Inst::AluRRR { - alu_op: ALUOp::Orr64, - rd, - rn: rd.to_reg(), - rm: tmp.to_reg(), - }); + let ty = ty.unwrap(); + if ty_bits(ty) < 128 { + let tmp = ctx.alloc_tmp(RegClass::I64, I64); + let rd = output_to_reg(ctx, outputs[0]); + let rcond = input_to_reg(ctx, inputs[0], NarrowValueMode::None); + let rn = input_to_reg(ctx, inputs[1], NarrowValueMode::None); + let rm = input_to_reg(ctx, inputs[2], NarrowValueMode::None); + // AND rTmp, rn, rcond + ctx.emit(Inst::AluRRR { + alu_op: ALUOp::And64, + rd: tmp, + rn, + rm: rcond, + }); + // BIC rd, rm, rcond + ctx.emit(Inst::AluRRR { + alu_op: ALUOp::AndNot64, + rd, + rn: rm, + rm: rcond, + }); + // ORR rd, rd, rTmp + ctx.emit(Inst::AluRRR { + alu_op: ALUOp::Orr64, + rd, + rn: rd.to_reg(), + rm: tmp.to_reg(), + }); + } else { + let rcond = input_to_reg(ctx, inputs[0], NarrowValueMode::None); + let rn = input_to_reg(ctx, inputs[1], NarrowValueMode::None); + let rm = input_to_reg(ctx, inputs[2], NarrowValueMode::None); + let rd = output_to_reg(ctx, outputs[0]); + ctx.emit(Inst::gen_move(rd, rcond, ty)); + + ctx.emit(Inst::VecRRR { + alu_op: VecALUOp::Bsl, + rd, + rn, + rm, + ty, + }); + } } Opcode::Trueif => {