arm64: Implement SIMD bitwise operations
Copyright (c) 2020, Arm Limited.
This commit is contained in:
1
build.rs
1
build.rs
@@ -181,6 +181,7 @@ fn ignore(testsuite: &str, testname: &str, strategy: &str) -> bool {
|
||||
},
|
||||
"Cranelift" => match (testsuite, testname) {
|
||||
("simd", "simd_address") => return false,
|
||||
("simd", "simd_bitwise") => return false,
|
||||
("simd", "simd_i8x16_cmp") => return false,
|
||||
("simd", "simd_i16x8_cmp") => return false,
|
||||
("simd", "simd_i32x4_cmp") => return false,
|
||||
|
||||
@@ -1035,7 +1035,7 @@ impl MachInstEmit for Inst {
|
||||
&Inst::VecMisc { op, rd, rn, ty } => {
|
||||
let bits_12_16 = match op {
|
||||
VecMisc2::Not => {
|
||||
debug_assert_eq!(I8X16, ty);
|
||||
debug_assert_eq!(128, ty_bits(ty));
|
||||
0b00101
|
||||
}
|
||||
};
|
||||
@@ -1256,6 +1256,28 @@ impl MachInstEmit for Inst {
|
||||
VecALUOp::Cmgt => (0b010_01110_00_1 | enc_size_for_cmp << 1, 0b001101),
|
||||
VecALUOp::Cmhi => (0b011_01110_00_1 | enc_size_for_cmp << 1, 0b001101),
|
||||
VecALUOp::Cmhs => (0b011_01110_00_1 | enc_size_for_cmp << 1, 0b001111),
|
||||
// The following instructions operate on bytes, so are not encoded differently
|
||||
// for the different vector types.
|
||||
VecALUOp::And => {
|
||||
debug_assert_eq!(128, ty_bits(ty));
|
||||
(0b010_01110_00_1, 0b000111)
|
||||
}
|
||||
VecALUOp::Bic => {
|
||||
debug_assert_eq!(128, ty_bits(ty));
|
||||
(0b010_01110_01_1, 0b000111)
|
||||
}
|
||||
VecALUOp::Orr => {
|
||||
debug_assert_eq!(128, ty_bits(ty));
|
||||
(0b010_01110_10_1, 0b000111)
|
||||
}
|
||||
VecALUOp::Eor => {
|
||||
debug_assert_eq!(128, ty_bits(ty));
|
||||
(0b011_01110_00_1, 0b000111)
|
||||
}
|
||||
VecALUOp::Bsl => {
|
||||
debug_assert_eq!(128, ty_bits(ty));
|
||||
(0b011_01110_01_1, 0b000111)
|
||||
}
|
||||
};
|
||||
sink.put4(enc_vec_rrr(top11, rm, bit15_10, rn, rd));
|
||||
}
|
||||
|
||||
@@ -2191,12 +2191,72 @@ fn test_aarch64_binemit() {
|
||||
"cmhs v8.4s, v2.4s, v15.4s",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::VecRRR {
|
||||
alu_op: VecALUOp::And,
|
||||
rd: writable_vreg(20),
|
||||
rn: vreg(19),
|
||||
rm: vreg(18),
|
||||
ty: I32X4,
|
||||
},
|
||||
"741E324E",
|
||||
"and v20.16b, v19.16b, v18.16b",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::VecRRR {
|
||||
alu_op: VecALUOp::Bic,
|
||||
rd: writable_vreg(8),
|
||||
rn: vreg(11),
|
||||
rm: vreg(1),
|
||||
ty: I8X16,
|
||||
},
|
||||
"681D614E",
|
||||
"bic v8.16b, v11.16b, v1.16b",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::VecRRR {
|
||||
alu_op: VecALUOp::Orr,
|
||||
rd: writable_vreg(15),
|
||||
rn: vreg(2),
|
||||
rm: vreg(12),
|
||||
ty: I16X8,
|
||||
},
|
||||
"4F1CAC4E",
|
||||
"orr v15.16b, v2.16b, v12.16b",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::VecRRR {
|
||||
alu_op: VecALUOp::Eor,
|
||||
rd: writable_vreg(18),
|
||||
rn: vreg(3),
|
||||
rm: vreg(22),
|
||||
ty: I8X16,
|
||||
},
|
||||
"721C366E",
|
||||
"eor v18.16b, v3.16b, v22.16b",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::VecRRR {
|
||||
alu_op: VecALUOp::Bsl,
|
||||
rd: writable_vreg(8),
|
||||
rn: vreg(9),
|
||||
rm: vreg(1),
|
||||
ty: I8X16,
|
||||
},
|
||||
"281D616E",
|
||||
"bsl v8.16b, v9.16b, v1.16b",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::VecMisc {
|
||||
op: VecMisc2::Not,
|
||||
rd: writable_vreg(2),
|
||||
rn: vreg(1),
|
||||
ty: I8X16,
|
||||
ty: I32X4,
|
||||
},
|
||||
"2258206E",
|
||||
"mvn v2.16b, v1.16b",
|
||||
|
||||
@@ -225,6 +225,16 @@ pub enum VecALUOp {
|
||||
Cmhs,
|
||||
/// Compare unsigned higher or same
|
||||
Cmhi,
|
||||
/// Bitwise and
|
||||
And,
|
||||
/// Bitwise bit clear
|
||||
Bic,
|
||||
/// Bitwise inclusive or
|
||||
Orr,
|
||||
/// Bitwise exclusive or
|
||||
Eor,
|
||||
/// Bitwise select
|
||||
Bsl,
|
||||
}
|
||||
|
||||
/// A Vector miscellaneous operation with two registers.
|
||||
@@ -1273,8 +1283,14 @@ fn aarch64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
|
||||
collector.add_def(rd);
|
||||
collector.add_use(rn);
|
||||
}
|
||||
&Inst::VecRRR { rd, rn, rm, .. } => {
|
||||
&Inst::VecRRR {
|
||||
alu_op, rd, rn, rm, ..
|
||||
} => {
|
||||
if alu_op == VecALUOp::Bsl {
|
||||
collector.add_mod(rd);
|
||||
} else {
|
||||
collector.add_def(rd);
|
||||
}
|
||||
collector.add_use(rn);
|
||||
collector.add_use(rm);
|
||||
}
|
||||
@@ -1851,12 +1867,17 @@ fn aarch64_map_regs<RUM: RegUsageMapper>(inst: &mut Inst, mapper: &RUM) {
|
||||
map_use(mapper, rn);
|
||||
}
|
||||
&mut Inst::VecRRR {
|
||||
alu_op,
|
||||
ref mut rd,
|
||||
ref mut rn,
|
||||
ref mut rm,
|
||||
..
|
||||
} => {
|
||||
if alu_op == VecALUOp::Bsl {
|
||||
map_mod(mapper, rd);
|
||||
} else {
|
||||
map_def(mapper, rd);
|
||||
}
|
||||
map_use(mapper, rn);
|
||||
map_use(mapper, rm);
|
||||
}
|
||||
@@ -2663,16 +2684,21 @@ impl ShowWithRRU for Inst {
|
||||
alu_op,
|
||||
ty,
|
||||
} => {
|
||||
let (op, vector) = match alu_op {
|
||||
VecALUOp::SQAddScalar => ("sqadd", false),
|
||||
VecALUOp::UQAddScalar => ("uqadd", false),
|
||||
VecALUOp::SQSubScalar => ("sqsub", false),
|
||||
VecALUOp::UQSubScalar => ("uqsub", false),
|
||||
VecALUOp::Cmeq => ("cmeq", true),
|
||||
VecALUOp::Cmge => ("cmge", true),
|
||||
VecALUOp::Cmgt => ("cmgt", true),
|
||||
VecALUOp::Cmhs => ("cmhs", true),
|
||||
VecALUOp::Cmhi => ("cmhi", true),
|
||||
let (op, vector, ty) = match alu_op {
|
||||
VecALUOp::SQAddScalar => ("sqadd", false, ty),
|
||||
VecALUOp::UQAddScalar => ("uqadd", false, ty),
|
||||
VecALUOp::SQSubScalar => ("sqsub", false, ty),
|
||||
VecALUOp::UQSubScalar => ("uqsub", false, ty),
|
||||
VecALUOp::Cmeq => ("cmeq", true, ty),
|
||||
VecALUOp::Cmge => ("cmge", true, ty),
|
||||
VecALUOp::Cmgt => ("cmgt", true, ty),
|
||||
VecALUOp::Cmhs => ("cmhs", true, ty),
|
||||
VecALUOp::Cmhi => ("cmhi", true, ty),
|
||||
VecALUOp::And => ("and", true, I8X16),
|
||||
VecALUOp::Bic => ("bic", true, I8X16),
|
||||
VecALUOp::Orr => ("orr", true, I8X16),
|
||||
VecALUOp::Eor => ("eor", true, I8X16),
|
||||
VecALUOp::Bsl => ("bsl", true, I8X16),
|
||||
};
|
||||
|
||||
let show_vreg_fn: fn(Reg, Option<&RealRegUniverse>, Type) -> String = if vector {
|
||||
@@ -2686,9 +2712,14 @@ impl ShowWithRRU for Inst {
|
||||
let rm = show_vreg_fn(rm, mb_rru, ty);
|
||||
format!("{} {}, {}, {}", op, rd, rn, rm)
|
||||
}
|
||||
&Inst::VecMisc { op, rd, rn, ty } => {
|
||||
let op = match op {
|
||||
VecMisc2::Not => "mvn",
|
||||
&Inst::VecMisc {
|
||||
op,
|
||||
rd,
|
||||
rn,
|
||||
ty: _ty,
|
||||
} => {
|
||||
let (op, ty) = match op {
|
||||
VecMisc2::Not => ("mvn", I8X16),
|
||||
};
|
||||
|
||||
let rd = show_vreg_vector(rd.to_reg(), mb_rru, ty);
|
||||
|
||||
@@ -386,11 +386,21 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
|
||||
Opcode::Bnot => {
|
||||
let rd = output_to_reg(ctx, outputs[0]);
|
||||
let rm = input_to_rs_immlogic(ctx, inputs[0], NarrowValueMode::None);
|
||||
let ty = ty.unwrap();
|
||||
if ty_bits(ty) < 128 {
|
||||
let rm = input_to_rs_immlogic(ctx, inputs[0], NarrowValueMode::None);
|
||||
let alu_op = choose_32_64(ty, ALUOp::OrrNot32, ALUOp::OrrNot64);
|
||||
// NOT rd, rm ==> ORR_NOT rd, zero, rm
|
||||
ctx.emit(alu_inst_immlogic(alu_op, rd, zero_reg(), rm));
|
||||
} else {
|
||||
let rm = input_to_reg(ctx, inputs[0], NarrowValueMode::None);
|
||||
ctx.emit(Inst::VecMisc {
|
||||
op: VecMisc2::Not,
|
||||
rd,
|
||||
rn: rm,
|
||||
ty,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
Opcode::Band
|
||||
@@ -400,9 +410,10 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
| Opcode::BorNot
|
||||
| Opcode::BxorNot => {
|
||||
let rd = output_to_reg(ctx, outputs[0]);
|
||||
let ty = ty.unwrap();
|
||||
if ty_bits(ty) < 128 {
|
||||
let rn = input_to_reg(ctx, inputs[0], NarrowValueMode::None);
|
||||
let rm = input_to_rs_immlogic(ctx, inputs[1], NarrowValueMode::None);
|
||||
let ty = ty.unwrap();
|
||||
let alu_op = match op {
|
||||
Opcode::Band => choose_32_64(ty, ALUOp::And32, ALUOp::And64),
|
||||
Opcode::Bor => choose_32_64(ty, ALUOp::Orr32, ALUOp::Orr64),
|
||||
@@ -413,6 +424,27 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
_ => unreachable!(),
|
||||
};
|
||||
ctx.emit(alu_inst_immlogic(alu_op, rd, rn, rm));
|
||||
} else {
|
||||
let alu_op = match op {
|
||||
Opcode::Band => VecALUOp::And,
|
||||
Opcode::BandNot => VecALUOp::Bic,
|
||||
Opcode::Bor => VecALUOp::Orr,
|
||||
Opcode::Bxor => VecALUOp::Eor,
|
||||
_ => unreachable!(),
|
||||
};
|
||||
|
||||
let rn = input_to_reg(ctx, inputs[0], NarrowValueMode::None);
|
||||
let rm = input_to_reg(ctx, inputs[1], NarrowValueMode::None);
|
||||
let rd = output_to_reg(ctx, outputs[0]);
|
||||
|
||||
ctx.emit(Inst::VecRRR {
|
||||
alu_op,
|
||||
rd,
|
||||
rn,
|
||||
rm,
|
||||
ty,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
Opcode::Ishl | Opcode::Ushr | Opcode::Sshr => {
|
||||
@@ -1035,6 +1067,8 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
}
|
||||
|
||||
Opcode::Bitselect => {
|
||||
let ty = ty.unwrap();
|
||||
if ty_bits(ty) < 128 {
|
||||
let tmp = ctx.alloc_tmp(RegClass::I64, I64);
|
||||
let rd = output_to_reg(ctx, outputs[0]);
|
||||
let rcond = input_to_reg(ctx, inputs[0], NarrowValueMode::None);
|
||||
@@ -1061,6 +1095,21 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
rn: rd.to_reg(),
|
||||
rm: tmp.to_reg(),
|
||||
});
|
||||
} else {
|
||||
let rcond = input_to_reg(ctx, inputs[0], NarrowValueMode::None);
|
||||
let rn = input_to_reg(ctx, inputs[1], NarrowValueMode::None);
|
||||
let rm = input_to_reg(ctx, inputs[2], NarrowValueMode::None);
|
||||
let rd = output_to_reg(ctx, outputs[0]);
|
||||
ctx.emit(Inst::gen_move(rd, rcond, ty));
|
||||
|
||||
ctx.emit(Inst::VecRRR {
|
||||
alu_op: VecALUOp::Bsl,
|
||||
rd,
|
||||
rn,
|
||||
rm,
|
||||
ty,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
Opcode::Trueif => {
|
||||
|
||||
Reference in New Issue
Block a user