arm64: Implement SIMD bitwise operations
Copyright (c) 2020, Arm Limited.
This commit is contained in:
1
build.rs
1
build.rs
@@ -181,6 +181,7 @@ fn ignore(testsuite: &str, testname: &str, strategy: &str) -> bool {
|
|||||||
},
|
},
|
||||||
"Cranelift" => match (testsuite, testname) {
|
"Cranelift" => match (testsuite, testname) {
|
||||||
("simd", "simd_address") => return false,
|
("simd", "simd_address") => return false,
|
||||||
|
("simd", "simd_bitwise") => return false,
|
||||||
("simd", "simd_i8x16_cmp") => return false,
|
("simd", "simd_i8x16_cmp") => return false,
|
||||||
("simd", "simd_i16x8_cmp") => return false,
|
("simd", "simd_i16x8_cmp") => return false,
|
||||||
("simd", "simd_i32x4_cmp") => return false,
|
("simd", "simd_i32x4_cmp") => return false,
|
||||||
|
|||||||
@@ -1035,7 +1035,7 @@ impl MachInstEmit for Inst {
|
|||||||
&Inst::VecMisc { op, rd, rn, ty } => {
|
&Inst::VecMisc { op, rd, rn, ty } => {
|
||||||
let bits_12_16 = match op {
|
let bits_12_16 = match op {
|
||||||
VecMisc2::Not => {
|
VecMisc2::Not => {
|
||||||
debug_assert_eq!(I8X16, ty);
|
debug_assert_eq!(128, ty_bits(ty));
|
||||||
0b00101
|
0b00101
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
@@ -1256,6 +1256,28 @@ impl MachInstEmit for Inst {
|
|||||||
VecALUOp::Cmgt => (0b010_01110_00_1 | enc_size_for_cmp << 1, 0b001101),
|
VecALUOp::Cmgt => (0b010_01110_00_1 | enc_size_for_cmp << 1, 0b001101),
|
||||||
VecALUOp::Cmhi => (0b011_01110_00_1 | enc_size_for_cmp << 1, 0b001101),
|
VecALUOp::Cmhi => (0b011_01110_00_1 | enc_size_for_cmp << 1, 0b001101),
|
||||||
VecALUOp::Cmhs => (0b011_01110_00_1 | enc_size_for_cmp << 1, 0b001111),
|
VecALUOp::Cmhs => (0b011_01110_00_1 | enc_size_for_cmp << 1, 0b001111),
|
||||||
|
// The following instructions operate on bytes, so are not encoded differently
|
||||||
|
// for the different vector types.
|
||||||
|
VecALUOp::And => {
|
||||||
|
debug_assert_eq!(128, ty_bits(ty));
|
||||||
|
(0b010_01110_00_1, 0b000111)
|
||||||
|
}
|
||||||
|
VecALUOp::Bic => {
|
||||||
|
debug_assert_eq!(128, ty_bits(ty));
|
||||||
|
(0b010_01110_01_1, 0b000111)
|
||||||
|
}
|
||||||
|
VecALUOp::Orr => {
|
||||||
|
debug_assert_eq!(128, ty_bits(ty));
|
||||||
|
(0b010_01110_10_1, 0b000111)
|
||||||
|
}
|
||||||
|
VecALUOp::Eor => {
|
||||||
|
debug_assert_eq!(128, ty_bits(ty));
|
||||||
|
(0b011_01110_00_1, 0b000111)
|
||||||
|
}
|
||||||
|
VecALUOp::Bsl => {
|
||||||
|
debug_assert_eq!(128, ty_bits(ty));
|
||||||
|
(0b011_01110_01_1, 0b000111)
|
||||||
|
}
|
||||||
};
|
};
|
||||||
sink.put4(enc_vec_rrr(top11, rm, bit15_10, rn, rd));
|
sink.put4(enc_vec_rrr(top11, rm, bit15_10, rn, rd));
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -2191,12 +2191,72 @@ fn test_aarch64_binemit() {
|
|||||||
"cmhs v8.4s, v2.4s, v15.4s",
|
"cmhs v8.4s, v2.4s, v15.4s",
|
||||||
));
|
));
|
||||||
|
|
||||||
|
insns.push((
|
||||||
|
Inst::VecRRR {
|
||||||
|
alu_op: VecALUOp::And,
|
||||||
|
rd: writable_vreg(20),
|
||||||
|
rn: vreg(19),
|
||||||
|
rm: vreg(18),
|
||||||
|
ty: I32X4,
|
||||||
|
},
|
||||||
|
"741E324E",
|
||||||
|
"and v20.16b, v19.16b, v18.16b",
|
||||||
|
));
|
||||||
|
|
||||||
|
insns.push((
|
||||||
|
Inst::VecRRR {
|
||||||
|
alu_op: VecALUOp::Bic,
|
||||||
|
rd: writable_vreg(8),
|
||||||
|
rn: vreg(11),
|
||||||
|
rm: vreg(1),
|
||||||
|
ty: I8X16,
|
||||||
|
},
|
||||||
|
"681D614E",
|
||||||
|
"bic v8.16b, v11.16b, v1.16b",
|
||||||
|
));
|
||||||
|
|
||||||
|
insns.push((
|
||||||
|
Inst::VecRRR {
|
||||||
|
alu_op: VecALUOp::Orr,
|
||||||
|
rd: writable_vreg(15),
|
||||||
|
rn: vreg(2),
|
||||||
|
rm: vreg(12),
|
||||||
|
ty: I16X8,
|
||||||
|
},
|
||||||
|
"4F1CAC4E",
|
||||||
|
"orr v15.16b, v2.16b, v12.16b",
|
||||||
|
));
|
||||||
|
|
||||||
|
insns.push((
|
||||||
|
Inst::VecRRR {
|
||||||
|
alu_op: VecALUOp::Eor,
|
||||||
|
rd: writable_vreg(18),
|
||||||
|
rn: vreg(3),
|
||||||
|
rm: vreg(22),
|
||||||
|
ty: I8X16,
|
||||||
|
},
|
||||||
|
"721C366E",
|
||||||
|
"eor v18.16b, v3.16b, v22.16b",
|
||||||
|
));
|
||||||
|
|
||||||
|
insns.push((
|
||||||
|
Inst::VecRRR {
|
||||||
|
alu_op: VecALUOp::Bsl,
|
||||||
|
rd: writable_vreg(8),
|
||||||
|
rn: vreg(9),
|
||||||
|
rm: vreg(1),
|
||||||
|
ty: I8X16,
|
||||||
|
},
|
||||||
|
"281D616E",
|
||||||
|
"bsl v8.16b, v9.16b, v1.16b",
|
||||||
|
));
|
||||||
|
|
||||||
insns.push((
|
insns.push((
|
||||||
Inst::VecMisc {
|
Inst::VecMisc {
|
||||||
op: VecMisc2::Not,
|
op: VecMisc2::Not,
|
||||||
rd: writable_vreg(2),
|
rd: writable_vreg(2),
|
||||||
rn: vreg(1),
|
rn: vreg(1),
|
||||||
ty: I8X16,
|
ty: I32X4,
|
||||||
},
|
},
|
||||||
"2258206E",
|
"2258206E",
|
||||||
"mvn v2.16b, v1.16b",
|
"mvn v2.16b, v1.16b",
|
||||||
|
|||||||
@@ -225,6 +225,16 @@ pub enum VecALUOp {
|
|||||||
Cmhs,
|
Cmhs,
|
||||||
/// Compare unsigned higher or same
|
/// Compare unsigned higher or same
|
||||||
Cmhi,
|
Cmhi,
|
||||||
|
/// Bitwise and
|
||||||
|
And,
|
||||||
|
/// Bitwise bit clear
|
||||||
|
Bic,
|
||||||
|
/// Bitwise inclusive or
|
||||||
|
Orr,
|
||||||
|
/// Bitwise exclusive or
|
||||||
|
Eor,
|
||||||
|
/// Bitwise select
|
||||||
|
Bsl,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// A Vector miscellaneous operation with two registers.
|
/// A Vector miscellaneous operation with two registers.
|
||||||
@@ -1273,8 +1283,14 @@ fn aarch64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
|
|||||||
collector.add_def(rd);
|
collector.add_def(rd);
|
||||||
collector.add_use(rn);
|
collector.add_use(rn);
|
||||||
}
|
}
|
||||||
&Inst::VecRRR { rd, rn, rm, .. } => {
|
&Inst::VecRRR {
|
||||||
|
alu_op, rd, rn, rm, ..
|
||||||
|
} => {
|
||||||
|
if alu_op == VecALUOp::Bsl {
|
||||||
|
collector.add_mod(rd);
|
||||||
|
} else {
|
||||||
collector.add_def(rd);
|
collector.add_def(rd);
|
||||||
|
}
|
||||||
collector.add_use(rn);
|
collector.add_use(rn);
|
||||||
collector.add_use(rm);
|
collector.add_use(rm);
|
||||||
}
|
}
|
||||||
@@ -1851,12 +1867,17 @@ fn aarch64_map_regs<RUM: RegUsageMapper>(inst: &mut Inst, mapper: &RUM) {
|
|||||||
map_use(mapper, rn);
|
map_use(mapper, rn);
|
||||||
}
|
}
|
||||||
&mut Inst::VecRRR {
|
&mut Inst::VecRRR {
|
||||||
|
alu_op,
|
||||||
ref mut rd,
|
ref mut rd,
|
||||||
ref mut rn,
|
ref mut rn,
|
||||||
ref mut rm,
|
ref mut rm,
|
||||||
..
|
..
|
||||||
} => {
|
} => {
|
||||||
|
if alu_op == VecALUOp::Bsl {
|
||||||
|
map_mod(mapper, rd);
|
||||||
|
} else {
|
||||||
map_def(mapper, rd);
|
map_def(mapper, rd);
|
||||||
|
}
|
||||||
map_use(mapper, rn);
|
map_use(mapper, rn);
|
||||||
map_use(mapper, rm);
|
map_use(mapper, rm);
|
||||||
}
|
}
|
||||||
@@ -2663,16 +2684,21 @@ impl ShowWithRRU for Inst {
|
|||||||
alu_op,
|
alu_op,
|
||||||
ty,
|
ty,
|
||||||
} => {
|
} => {
|
||||||
let (op, vector) = match alu_op {
|
let (op, vector, ty) = match alu_op {
|
||||||
VecALUOp::SQAddScalar => ("sqadd", false),
|
VecALUOp::SQAddScalar => ("sqadd", false, ty),
|
||||||
VecALUOp::UQAddScalar => ("uqadd", false),
|
VecALUOp::UQAddScalar => ("uqadd", false, ty),
|
||||||
VecALUOp::SQSubScalar => ("sqsub", false),
|
VecALUOp::SQSubScalar => ("sqsub", false, ty),
|
||||||
VecALUOp::UQSubScalar => ("uqsub", false),
|
VecALUOp::UQSubScalar => ("uqsub", false, ty),
|
||||||
VecALUOp::Cmeq => ("cmeq", true),
|
VecALUOp::Cmeq => ("cmeq", true, ty),
|
||||||
VecALUOp::Cmge => ("cmge", true),
|
VecALUOp::Cmge => ("cmge", true, ty),
|
||||||
VecALUOp::Cmgt => ("cmgt", true),
|
VecALUOp::Cmgt => ("cmgt", true, ty),
|
||||||
VecALUOp::Cmhs => ("cmhs", true),
|
VecALUOp::Cmhs => ("cmhs", true, ty),
|
||||||
VecALUOp::Cmhi => ("cmhi", true),
|
VecALUOp::Cmhi => ("cmhi", true, ty),
|
||||||
|
VecALUOp::And => ("and", true, I8X16),
|
||||||
|
VecALUOp::Bic => ("bic", true, I8X16),
|
||||||
|
VecALUOp::Orr => ("orr", true, I8X16),
|
||||||
|
VecALUOp::Eor => ("eor", true, I8X16),
|
||||||
|
VecALUOp::Bsl => ("bsl", true, I8X16),
|
||||||
};
|
};
|
||||||
|
|
||||||
let show_vreg_fn: fn(Reg, Option<&RealRegUniverse>, Type) -> String = if vector {
|
let show_vreg_fn: fn(Reg, Option<&RealRegUniverse>, Type) -> String = if vector {
|
||||||
@@ -2686,9 +2712,14 @@ impl ShowWithRRU for Inst {
|
|||||||
let rm = show_vreg_fn(rm, mb_rru, ty);
|
let rm = show_vreg_fn(rm, mb_rru, ty);
|
||||||
format!("{} {}, {}, {}", op, rd, rn, rm)
|
format!("{} {}, {}, {}", op, rd, rn, rm)
|
||||||
}
|
}
|
||||||
&Inst::VecMisc { op, rd, rn, ty } => {
|
&Inst::VecMisc {
|
||||||
let op = match op {
|
op,
|
||||||
VecMisc2::Not => "mvn",
|
rd,
|
||||||
|
rn,
|
||||||
|
ty: _ty,
|
||||||
|
} => {
|
||||||
|
let (op, ty) = match op {
|
||||||
|
VecMisc2::Not => ("mvn", I8X16),
|
||||||
};
|
};
|
||||||
|
|
||||||
let rd = show_vreg_vector(rd.to_reg(), mb_rru, ty);
|
let rd = show_vreg_vector(rd.to_reg(), mb_rru, ty);
|
||||||
|
|||||||
@@ -386,11 +386,21 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||||||
|
|
||||||
Opcode::Bnot => {
|
Opcode::Bnot => {
|
||||||
let rd = output_to_reg(ctx, outputs[0]);
|
let rd = output_to_reg(ctx, outputs[0]);
|
||||||
let rm = input_to_rs_immlogic(ctx, inputs[0], NarrowValueMode::None);
|
|
||||||
let ty = ty.unwrap();
|
let ty = ty.unwrap();
|
||||||
|
if ty_bits(ty) < 128 {
|
||||||
|
let rm = input_to_rs_immlogic(ctx, inputs[0], NarrowValueMode::None);
|
||||||
let alu_op = choose_32_64(ty, ALUOp::OrrNot32, ALUOp::OrrNot64);
|
let alu_op = choose_32_64(ty, ALUOp::OrrNot32, ALUOp::OrrNot64);
|
||||||
// NOT rd, rm ==> ORR_NOT rd, zero, rm
|
// NOT rd, rm ==> ORR_NOT rd, zero, rm
|
||||||
ctx.emit(alu_inst_immlogic(alu_op, rd, zero_reg(), rm));
|
ctx.emit(alu_inst_immlogic(alu_op, rd, zero_reg(), rm));
|
||||||
|
} else {
|
||||||
|
let rm = input_to_reg(ctx, inputs[0], NarrowValueMode::None);
|
||||||
|
ctx.emit(Inst::VecMisc {
|
||||||
|
op: VecMisc2::Not,
|
||||||
|
rd,
|
||||||
|
rn: rm,
|
||||||
|
ty,
|
||||||
|
});
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Opcode::Band
|
Opcode::Band
|
||||||
@@ -400,9 +410,10 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||||||
| Opcode::BorNot
|
| Opcode::BorNot
|
||||||
| Opcode::BxorNot => {
|
| Opcode::BxorNot => {
|
||||||
let rd = output_to_reg(ctx, outputs[0]);
|
let rd = output_to_reg(ctx, outputs[0]);
|
||||||
|
let ty = ty.unwrap();
|
||||||
|
if ty_bits(ty) < 128 {
|
||||||
let rn = input_to_reg(ctx, inputs[0], NarrowValueMode::None);
|
let rn = input_to_reg(ctx, inputs[0], NarrowValueMode::None);
|
||||||
let rm = input_to_rs_immlogic(ctx, inputs[1], NarrowValueMode::None);
|
let rm = input_to_rs_immlogic(ctx, inputs[1], NarrowValueMode::None);
|
||||||
let ty = ty.unwrap();
|
|
||||||
let alu_op = match op {
|
let alu_op = match op {
|
||||||
Opcode::Band => choose_32_64(ty, ALUOp::And32, ALUOp::And64),
|
Opcode::Band => choose_32_64(ty, ALUOp::And32, ALUOp::And64),
|
||||||
Opcode::Bor => choose_32_64(ty, ALUOp::Orr32, ALUOp::Orr64),
|
Opcode::Bor => choose_32_64(ty, ALUOp::Orr32, ALUOp::Orr64),
|
||||||
@@ -413,6 +424,27 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||||||
_ => unreachable!(),
|
_ => unreachable!(),
|
||||||
};
|
};
|
||||||
ctx.emit(alu_inst_immlogic(alu_op, rd, rn, rm));
|
ctx.emit(alu_inst_immlogic(alu_op, rd, rn, rm));
|
||||||
|
} else {
|
||||||
|
let alu_op = match op {
|
||||||
|
Opcode::Band => VecALUOp::And,
|
||||||
|
Opcode::BandNot => VecALUOp::Bic,
|
||||||
|
Opcode::Bor => VecALUOp::Orr,
|
||||||
|
Opcode::Bxor => VecALUOp::Eor,
|
||||||
|
_ => unreachable!(),
|
||||||
|
};
|
||||||
|
|
||||||
|
let rn = input_to_reg(ctx, inputs[0], NarrowValueMode::None);
|
||||||
|
let rm = input_to_reg(ctx, inputs[1], NarrowValueMode::None);
|
||||||
|
let rd = output_to_reg(ctx, outputs[0]);
|
||||||
|
|
||||||
|
ctx.emit(Inst::VecRRR {
|
||||||
|
alu_op,
|
||||||
|
rd,
|
||||||
|
rn,
|
||||||
|
rm,
|
||||||
|
ty,
|
||||||
|
});
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Opcode::Ishl | Opcode::Ushr | Opcode::Sshr => {
|
Opcode::Ishl | Opcode::Ushr | Opcode::Sshr => {
|
||||||
@@ -1035,6 +1067,8 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||||||
}
|
}
|
||||||
|
|
||||||
Opcode::Bitselect => {
|
Opcode::Bitselect => {
|
||||||
|
let ty = ty.unwrap();
|
||||||
|
if ty_bits(ty) < 128 {
|
||||||
let tmp = ctx.alloc_tmp(RegClass::I64, I64);
|
let tmp = ctx.alloc_tmp(RegClass::I64, I64);
|
||||||
let rd = output_to_reg(ctx, outputs[0]);
|
let rd = output_to_reg(ctx, outputs[0]);
|
||||||
let rcond = input_to_reg(ctx, inputs[0], NarrowValueMode::None);
|
let rcond = input_to_reg(ctx, inputs[0], NarrowValueMode::None);
|
||||||
@@ -1061,6 +1095,21 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||||||
rn: rd.to_reg(),
|
rn: rd.to_reg(),
|
||||||
rm: tmp.to_reg(),
|
rm: tmp.to_reg(),
|
||||||
});
|
});
|
||||||
|
} else {
|
||||||
|
let rcond = input_to_reg(ctx, inputs[0], NarrowValueMode::None);
|
||||||
|
let rn = input_to_reg(ctx, inputs[1], NarrowValueMode::None);
|
||||||
|
let rm = input_to_reg(ctx, inputs[2], NarrowValueMode::None);
|
||||||
|
let rd = output_to_reg(ctx, outputs[0]);
|
||||||
|
ctx.emit(Inst::gen_move(rd, rcond, ty));
|
||||||
|
|
||||||
|
ctx.emit(Inst::VecRRR {
|
||||||
|
alu_op: VecALUOp::Bsl,
|
||||||
|
rd,
|
||||||
|
rn,
|
||||||
|
rm,
|
||||||
|
ty,
|
||||||
|
});
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Opcode::Trueif => {
|
Opcode::Trueif => {
|
||||||
|
|||||||
Reference in New Issue
Block a user