Implement vector element extensions for AArch64
This commit also includes load and extend operations. Both are prerequisites for enabling further SIMD spec tests. Copyright (c) 2020, Arm Limited.
This commit is contained in:
@@ -406,7 +406,7 @@ fn in_int_reg(ty: ir::Type) -> bool {
|
||||
|
||||
fn in_vec_reg(ty: ir::Type) -> bool {
|
||||
match ty {
|
||||
types::F32 | types::F64 | types::I8X16 => true,
|
||||
types::F32 | types::F64 | types::I8X16 | types::I16X8 | types::I32X4 | types::I64X2 => true,
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1149,6 +1149,23 @@ impl MachInstEmit for Inst {
|
||||
| machreg_to_gpr(rd.to_reg()),
|
||||
);
|
||||
}
|
||||
&Inst::VecExtend { t, rd, rn } => {
|
||||
let (u, immh) = match t {
|
||||
VecExtendOp::Sxtl8 => (0b0, 0b001),
|
||||
VecExtendOp::Sxtl16 => (0b0, 0b010),
|
||||
VecExtendOp::Sxtl32 => (0b0, 0b100),
|
||||
VecExtendOp::Uxtl8 => (0b1, 0b001),
|
||||
VecExtendOp::Uxtl16 => (0b1, 0b010),
|
||||
VecExtendOp::Uxtl32 => (0b1, 0b100),
|
||||
};
|
||||
sink.put4(
|
||||
0b000_011110_0000_000_101001_00000_00000
|
||||
| (u << 29)
|
||||
| (immh << 19)
|
||||
| (machreg_to_vec(rn) << 5)
|
||||
| machreg_to_vec(rd.to_reg()),
|
||||
);
|
||||
}
|
||||
&Inst::VecRRR {
|
||||
rd,
|
||||
rn,
|
||||
|
||||
@@ -1826,6 +1826,60 @@ fn test_aarch64_binemit() {
|
||||
"E5979F9A",
|
||||
"cset x5, hi",
|
||||
));
|
||||
insns.push((
|
||||
Inst::VecExtend {
|
||||
t: VecExtendOp::Sxtl8,
|
||||
rd: writable_vreg(4),
|
||||
rn: vreg(27),
|
||||
},
|
||||
"64A7080F",
|
||||
"sxtl v4.8h, v27.8b",
|
||||
));
|
||||
insns.push((
|
||||
Inst::VecExtend {
|
||||
t: VecExtendOp::Sxtl16,
|
||||
rd: writable_vreg(17),
|
||||
rn: vreg(19),
|
||||
},
|
||||
"71A6100F",
|
||||
"sxtl v17.4s, v19.4h",
|
||||
));
|
||||
insns.push((
|
||||
Inst::VecExtend {
|
||||
t: VecExtendOp::Sxtl32,
|
||||
rd: writable_vreg(30),
|
||||
rn: vreg(6),
|
||||
},
|
||||
"DEA4200F",
|
||||
"sxtl v30.2d, v6.2s",
|
||||
));
|
||||
insns.push((
|
||||
Inst::VecExtend {
|
||||
t: VecExtendOp::Uxtl8,
|
||||
rd: writable_vreg(3),
|
||||
rn: vreg(29),
|
||||
},
|
||||
"A3A7082F",
|
||||
"uxtl v3.8h, v29.8b",
|
||||
));
|
||||
insns.push((
|
||||
Inst::VecExtend {
|
||||
t: VecExtendOp::Uxtl16,
|
||||
rd: writable_vreg(15),
|
||||
rn: vreg(12),
|
||||
},
|
||||
"8FA5102F",
|
||||
"uxtl v15.4s, v12.4h",
|
||||
));
|
||||
insns.push((
|
||||
Inst::VecExtend {
|
||||
t: VecExtendOp::Uxtl32,
|
||||
rd: writable_vreg(28),
|
||||
rn: vreg(2),
|
||||
},
|
||||
"5CA4202F",
|
||||
"uxtl v28.2d, v2.2s",
|
||||
));
|
||||
insns.push((
|
||||
Inst::VecRRR {
|
||||
rd: writable_vreg(21),
|
||||
|
||||
@@ -5,7 +5,8 @@
|
||||
|
||||
use crate::binemit::CodeOffset;
|
||||
use crate::ir::types::{
|
||||
B1, B16, B32, B64, B8, B8X16, F32, F32X2, F64, FFLAGS, I128, I16, I32, I64, I8, I8X16, IFLAGS,
|
||||
B1, B16, B32, B64, B8, B8X16, F32, F32X2, F64, FFLAGS, I128, I16, I16X4, I16X8, I32, I32X2,
|
||||
I32X4, I64, I64X2, I8, I8X16, I8X8, IFLAGS,
|
||||
};
|
||||
use crate::ir::{ExternalName, Opcode, SourceLoc, TrapCode, Type};
|
||||
use crate::machinst::*;
|
||||
@@ -186,6 +187,23 @@ pub enum FpuRoundMode {
|
||||
Nearest64,
|
||||
}
|
||||
|
||||
/// Type of vector element extensions.
|
||||
#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
|
||||
pub enum VecExtendOp {
|
||||
/// Signed extension of 8-bit elements
|
||||
Sxtl8,
|
||||
/// Signed extension of 16-bit elements
|
||||
Sxtl16,
|
||||
/// Signed extension of 32-bit elements
|
||||
Sxtl32,
|
||||
/// Unsigned extension of 8-bit elements
|
||||
Uxtl8,
|
||||
/// Unsigned extension of 16-bit elements
|
||||
Uxtl16,
|
||||
/// Unsigned extension of 32-bit elements
|
||||
Uxtl32,
|
||||
}
|
||||
|
||||
/// A vector ALU operation.
|
||||
#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
|
||||
pub enum VecALUOp {
|
||||
@@ -667,6 +685,13 @@ pub enum Inst {
|
||||
rn: Reg,
|
||||
},
|
||||
|
||||
/// Vector extend.
|
||||
VecExtend {
|
||||
t: VecExtendOp,
|
||||
rd: Writable<Reg>,
|
||||
rn: Reg,
|
||||
},
|
||||
|
||||
/// A vector ALU op.
|
||||
VecRRR {
|
||||
alu_op: VecALUOp,
|
||||
@@ -1208,6 +1233,10 @@ fn aarch64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
|
||||
collector.add_def(rd);
|
||||
collector.add_use(rn);
|
||||
}
|
||||
&Inst::VecExtend { rd, rn, .. } => {
|
||||
collector.add_def(rd);
|
||||
collector.add_use(rn);
|
||||
}
|
||||
&Inst::VecRRR { rd, rn, rm, .. } => {
|
||||
collector.add_def(rd);
|
||||
collector.add_use(rn);
|
||||
@@ -1752,6 +1781,14 @@ fn aarch64_map_regs<RUM: RegUsageMapper>(inst: &mut Inst, mapper: &RUM) {
|
||||
map_def(mapper, rd);
|
||||
map_use(mapper, rn);
|
||||
}
|
||||
&mut Inst::VecExtend {
|
||||
ref mut rd,
|
||||
ref mut rn,
|
||||
..
|
||||
} => {
|
||||
map_def(mapper, rd);
|
||||
map_use(mapper, rn);
|
||||
}
|
||||
&mut Inst::VecRRR {
|
||||
ref mut rd,
|
||||
ref mut rn,
|
||||
@@ -1940,7 +1977,7 @@ impl MachInst for Inst {
|
||||
I8 | I16 | I32 | I64 | B1 | B8 | B16 | B32 | B64 => Ok(RegClass::I64),
|
||||
F32 | F64 => Ok(RegClass::V128),
|
||||
IFLAGS | FFLAGS => Ok(RegClass::I64),
|
||||
I8X16 => Ok(RegClass::V128),
|
||||
I8X16 | I16X8 | I32X4 | I64X2 => Ok(RegClass::V128),
|
||||
B8X16 => Ok(RegClass::V128),
|
||||
_ => Err(CodegenError::Unsupported(format!(
|
||||
"Unexpected SSA-value type: {}",
|
||||
@@ -2515,6 +2552,19 @@ impl ShowWithRRU for Inst {
|
||||
let rn = rn.show_rru(mb_rru);
|
||||
format!("mov {}, {}.d[0]", rd, rn)
|
||||
}
|
||||
&Inst::VecExtend { t, rd, rn } => {
|
||||
let (op, dest, src) = match t {
|
||||
VecExtendOp::Sxtl8 => ("sxtl", I16X8, I8X8),
|
||||
VecExtendOp::Sxtl16 => ("sxtl", I32X4, I16X4),
|
||||
VecExtendOp::Sxtl32 => ("sxtl", I64X2, I32X2),
|
||||
VecExtendOp::Uxtl8 => ("uxtl", I16X8, I8X8),
|
||||
VecExtendOp::Uxtl16 => ("uxtl", I32X4, I16X4),
|
||||
VecExtendOp::Uxtl32 => ("uxtl", I64X2, I32X2),
|
||||
};
|
||||
let rd = show_vreg_vector(rd.to_reg(), mb_rru, dest);
|
||||
let rn = show_vreg_vector(rn, mb_rru, src);
|
||||
format!("{} {}, {}", op, rd, rn)
|
||||
}
|
||||
&Inst::VecRRR {
|
||||
rd,
|
||||
rn,
|
||||
|
||||
@@ -321,6 +321,12 @@ pub fn show_vreg_vector(reg: Reg, mb_rru: Option<&RealRegUniverse>, ty: Type) ->
|
||||
match ty {
|
||||
I8X16 => s.push_str(".16b"),
|
||||
F32X2 => s.push_str(".2s"),
|
||||
I8X8 => s.push_str(".8b"),
|
||||
I16X4 => s.push_str(".4h"),
|
||||
I16X8 => s.push_str(".8h"),
|
||||
I32X2 => s.push_str(".2s"),
|
||||
I32X4 => s.push_str(".4s"),
|
||||
I64X2 => s.push_str(".2d"),
|
||||
_ => unimplemented!(),
|
||||
}
|
||||
|
||||
|
||||
@@ -716,7 +716,8 @@ pub fn ty_bits(ty: Type) -> usize {
|
||||
B64 | I64 | F64 => 64,
|
||||
B128 | I128 => 128,
|
||||
IFLAGS | FFLAGS => 32,
|
||||
I8X16 | B8X16 => 128,
|
||||
I8X8 | I16X4 | I32X2 => 64,
|
||||
B8X16 | I8X16 | I16X8 | I32X4 | I64X2 => 128,
|
||||
_ => panic!("ty_bits() on unknown type: {:?}", ty),
|
||||
}
|
||||
}
|
||||
@@ -724,7 +725,7 @@ pub fn ty_bits(ty: Type) -> usize {
|
||||
pub(crate) fn ty_is_int(ty: Type) -> bool {
|
||||
match ty {
|
||||
B1 | B8 | I8 | B16 | I16 | B32 | I32 | B64 | I64 => true,
|
||||
F32 | F64 | B128 | I128 | I8X16 => false,
|
||||
F32 | F64 | B128 | I128 | I8X8 | I8X16 | I16X4 | I16X8 | I32X2 | I32X4 | I64X2 => false,
|
||||
IFLAGS | FFLAGS => panic!("Unexpected flags type"),
|
||||
_ => panic!("ty_is_int() on unknown type: {:?}", ty),
|
||||
}
|
||||
|
||||
@@ -829,7 +829,13 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
| Opcode::Uload16Complex
|
||||
| Opcode::Sload16Complex
|
||||
| Opcode::Uload32Complex
|
||||
| Opcode::Sload32Complex => {
|
||||
| Opcode::Sload32Complex
|
||||
| Opcode::Sload8x8
|
||||
| Opcode::Uload8x8
|
||||
| Opcode::Sload16x4
|
||||
| Opcode::Uload16x4
|
||||
| Opcode::Sload32x2
|
||||
| Opcode::Uload32x2 => {
|
||||
let off = ldst_offset(ctx.data(insn)).unwrap();
|
||||
let elem_ty = match op {
|
||||
Opcode::Sload8 | Opcode::Uload8 | Opcode::Sload8Complex | Opcode::Uload8Complex => {
|
||||
@@ -844,6 +850,9 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
| Opcode::Sload32Complex
|
||||
| Opcode::Uload32Complex => I32,
|
||||
Opcode::Load | Opcode::LoadComplex => ctx.output_ty(insn, 0),
|
||||
Opcode::Sload8x8 | Opcode::Uload8x8 => I8X8,
|
||||
Opcode::Sload16x4 | Opcode::Uload16x4 => I16X4,
|
||||
Opcode::Sload32x2 | Opcode::Uload32x2 => I32X2,
|
||||
_ => unreachable!(),
|
||||
};
|
||||
let sign_extend = match op {
|
||||
@@ -877,10 +886,30 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
(32, true, false) => Inst::SLoad32 { rd, mem, srcloc },
|
||||
(32, _, true) => Inst::FpuLoad32 { rd, mem, srcloc },
|
||||
(64, _, false) => Inst::ULoad64 { rd, mem, srcloc },
|
||||
// Note that we treat some of the vector loads as scalar floating-point loads,
|
||||
// which is correct in a little endian environment.
|
||||
(64, _, true) => Inst::FpuLoad64 { rd, mem, srcloc },
|
||||
(128, _, _) => Inst::FpuLoad128 { rd, mem, srcloc },
|
||||
_ => panic!("Unsupported size in load"),
|
||||
});
|
||||
|
||||
let vec_extend = match op {
|
||||
Opcode::Sload8x8 => Some(VecExtendOp::Sxtl8),
|
||||
Opcode::Uload8x8 => Some(VecExtendOp::Uxtl8),
|
||||
Opcode::Sload16x4 => Some(VecExtendOp::Sxtl16),
|
||||
Opcode::Uload16x4 => Some(VecExtendOp::Uxtl16),
|
||||
Opcode::Sload32x2 => Some(VecExtendOp::Sxtl32),
|
||||
Opcode::Uload32x2 => Some(VecExtendOp::Uxtl32),
|
||||
_ => None,
|
||||
};
|
||||
|
||||
if let Some(t) = vec_extend {
|
||||
ctx.emit(Inst::VecExtend {
|
||||
t,
|
||||
rd,
|
||||
rn: rd.to_reg(),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
Opcode::Store
|
||||
@@ -1433,17 +1462,11 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
| Opcode::Extractlane
|
||||
| Opcode::ScalarToVector
|
||||
| Opcode::Swizzle
|
||||
| Opcode::Uload8x8
|
||||
| Opcode::Uload8x8Complex
|
||||
| Opcode::Sload8x8
|
||||
| Opcode::Sload8x8Complex
|
||||
| Opcode::Uload16x4
|
||||
| Opcode::Uload16x4Complex
|
||||
| Opcode::Sload16x4
|
||||
| Opcode::Sload16x4Complex
|
||||
| Opcode::Uload32x2
|
||||
| Opcode::Uload32x2Complex
|
||||
| Opcode::Sload32x2
|
||||
| Opcode::Sload32x2Complex => {
|
||||
// TODO
|
||||
panic!("Vector ops not implemented.");
|
||||
|
||||
Reference in New Issue
Block a user