Implement vector element extensions for AArch64

This commit also includes load and extend operations. Both are
prerequisites for enabling further SIMD spec tests.

Copyright (c) 2020, Arm Limited.
This commit is contained in:
Anton Kirilov
2020-05-28 17:19:36 +01:00
parent fb9f39ce17
commit 51a551fb39
8 changed files with 164 additions and 13 deletions

View File

@@ -406,7 +406,7 @@ fn in_int_reg(ty: ir::Type) -> bool {
fn in_vec_reg(ty: ir::Type) -> bool {
match ty {
types::F32 | types::F64 | types::I8X16 => true,
types::F32 | types::F64 | types::I8X16 | types::I16X8 | types::I32X4 | types::I64X2 => true,
_ => false,
}
}

View File

@@ -1149,6 +1149,23 @@ impl MachInstEmit for Inst {
| machreg_to_gpr(rd.to_reg()),
);
}
&Inst::VecExtend { t, rd, rn } => {
let (u, immh) = match t {
VecExtendOp::Sxtl8 => (0b0, 0b001),
VecExtendOp::Sxtl16 => (0b0, 0b010),
VecExtendOp::Sxtl32 => (0b0, 0b100),
VecExtendOp::Uxtl8 => (0b1, 0b001),
VecExtendOp::Uxtl16 => (0b1, 0b010),
VecExtendOp::Uxtl32 => (0b1, 0b100),
};
sink.put4(
0b000_011110_0000_000_101001_00000_00000
| (u << 29)
| (immh << 19)
| (machreg_to_vec(rn) << 5)
| machreg_to_vec(rd.to_reg()),
);
}
&Inst::VecRRR {
rd,
rn,

View File

@@ -1826,6 +1826,60 @@ fn test_aarch64_binemit() {
"E5979F9A",
"cset x5, hi",
));
insns.push((
Inst::VecExtend {
t: VecExtendOp::Sxtl8,
rd: writable_vreg(4),
rn: vreg(27),
},
"64A7080F",
"sxtl v4.8h, v27.8b",
));
insns.push((
Inst::VecExtend {
t: VecExtendOp::Sxtl16,
rd: writable_vreg(17),
rn: vreg(19),
},
"71A6100F",
"sxtl v17.4s, v19.4h",
));
insns.push((
Inst::VecExtend {
t: VecExtendOp::Sxtl32,
rd: writable_vreg(30),
rn: vreg(6),
},
"DEA4200F",
"sxtl v30.2d, v6.2s",
));
insns.push((
Inst::VecExtend {
t: VecExtendOp::Uxtl8,
rd: writable_vreg(3),
rn: vreg(29),
},
"A3A7082F",
"uxtl v3.8h, v29.8b",
));
insns.push((
Inst::VecExtend {
t: VecExtendOp::Uxtl16,
rd: writable_vreg(15),
rn: vreg(12),
},
"8FA5102F",
"uxtl v15.4s, v12.4h",
));
insns.push((
Inst::VecExtend {
t: VecExtendOp::Uxtl32,
rd: writable_vreg(28),
rn: vreg(2),
},
"5CA4202F",
"uxtl v28.2d, v2.2s",
));
insns.push((
Inst::VecRRR {
rd: writable_vreg(21),

View File

@@ -5,7 +5,8 @@
use crate::binemit::CodeOffset;
use crate::ir::types::{
B1, B16, B32, B64, B8, B8X16, F32, F32X2, F64, FFLAGS, I128, I16, I32, I64, I8, I8X16, IFLAGS,
B1, B16, B32, B64, B8, B8X16, F32, F32X2, F64, FFLAGS, I128, I16, I16X4, I16X8, I32, I32X2,
I32X4, I64, I64X2, I8, I8X16, I8X8, IFLAGS,
};
use crate::ir::{ExternalName, Opcode, SourceLoc, TrapCode, Type};
use crate::machinst::*;
@@ -186,6 +187,23 @@ pub enum FpuRoundMode {
Nearest64,
}
/// Type of vector element extensions.
#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
pub enum VecExtendOp {
/// Signed extension of 8-bit elements
Sxtl8,
/// Signed extension of 16-bit elements
Sxtl16,
/// Signed extension of 32-bit elements
Sxtl32,
/// Unsigned extension of 8-bit elements
Uxtl8,
/// Unsigned extension of 16-bit elements
Uxtl16,
/// Unsigned extension of 32-bit elements
Uxtl32,
}
/// A vector ALU operation.
#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
pub enum VecALUOp {
@@ -667,6 +685,13 @@ pub enum Inst {
rn: Reg,
},
/// Vector extend.
VecExtend {
t: VecExtendOp,
rd: Writable<Reg>,
rn: Reg,
},
/// A vector ALU op.
VecRRR {
alu_op: VecALUOp,
@@ -1208,6 +1233,10 @@ fn aarch64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
collector.add_def(rd);
collector.add_use(rn);
}
&Inst::VecExtend { rd, rn, .. } => {
collector.add_def(rd);
collector.add_use(rn);
}
&Inst::VecRRR { rd, rn, rm, .. } => {
collector.add_def(rd);
collector.add_use(rn);
@@ -1752,6 +1781,14 @@ fn aarch64_map_regs<RUM: RegUsageMapper>(inst: &mut Inst, mapper: &RUM) {
map_def(mapper, rd);
map_use(mapper, rn);
}
&mut Inst::VecExtend {
ref mut rd,
ref mut rn,
..
} => {
map_def(mapper, rd);
map_use(mapper, rn);
}
&mut Inst::VecRRR {
ref mut rd,
ref mut rn,
@@ -1940,7 +1977,7 @@ impl MachInst for Inst {
I8 | I16 | I32 | I64 | B1 | B8 | B16 | B32 | B64 => Ok(RegClass::I64),
F32 | F64 => Ok(RegClass::V128),
IFLAGS | FFLAGS => Ok(RegClass::I64),
I8X16 => Ok(RegClass::V128),
I8X16 | I16X8 | I32X4 | I64X2 => Ok(RegClass::V128),
B8X16 => Ok(RegClass::V128),
_ => Err(CodegenError::Unsupported(format!(
"Unexpected SSA-value type: {}",
@@ -2515,6 +2552,19 @@ impl ShowWithRRU for Inst {
let rn = rn.show_rru(mb_rru);
format!("mov {}, {}.d[0]", rd, rn)
}
&Inst::VecExtend { t, rd, rn } => {
let (op, dest, src) = match t {
VecExtendOp::Sxtl8 => ("sxtl", I16X8, I8X8),
VecExtendOp::Sxtl16 => ("sxtl", I32X4, I16X4),
VecExtendOp::Sxtl32 => ("sxtl", I64X2, I32X2),
VecExtendOp::Uxtl8 => ("uxtl", I16X8, I8X8),
VecExtendOp::Uxtl16 => ("uxtl", I32X4, I16X4),
VecExtendOp::Uxtl32 => ("uxtl", I64X2, I32X2),
};
let rd = show_vreg_vector(rd.to_reg(), mb_rru, dest);
let rn = show_vreg_vector(rn, mb_rru, src);
format!("{} {}, {}", op, rd, rn)
}
&Inst::VecRRR {
rd,
rn,

View File

@@ -321,6 +321,12 @@ pub fn show_vreg_vector(reg: Reg, mb_rru: Option<&RealRegUniverse>, ty: Type) ->
match ty {
I8X16 => s.push_str(".16b"),
F32X2 => s.push_str(".2s"),
I8X8 => s.push_str(".8b"),
I16X4 => s.push_str(".4h"),
I16X8 => s.push_str(".8h"),
I32X2 => s.push_str(".2s"),
I32X4 => s.push_str(".4s"),
I64X2 => s.push_str(".2d"),
_ => unimplemented!(),
}

View File

@@ -716,7 +716,8 @@ pub fn ty_bits(ty: Type) -> usize {
B64 | I64 | F64 => 64,
B128 | I128 => 128,
IFLAGS | FFLAGS => 32,
I8X16 | B8X16 => 128,
I8X8 | I16X4 | I32X2 => 64,
B8X16 | I8X16 | I16X8 | I32X4 | I64X2 => 128,
_ => panic!("ty_bits() on unknown type: {:?}", ty),
}
}
@@ -724,7 +725,7 @@ pub fn ty_bits(ty: Type) -> usize {
pub(crate) fn ty_is_int(ty: Type) -> bool {
match ty {
B1 | B8 | I8 | B16 | I16 | B32 | I32 | B64 | I64 => true,
F32 | F64 | B128 | I128 | I8X16 => false,
F32 | F64 | B128 | I128 | I8X8 | I8X16 | I16X4 | I16X8 | I32X2 | I32X4 | I64X2 => false,
IFLAGS | FFLAGS => panic!("Unexpected flags type"),
_ => panic!("ty_is_int() on unknown type: {:?}", ty),
}

View File

@@ -829,7 +829,13 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
| Opcode::Uload16Complex
| Opcode::Sload16Complex
| Opcode::Uload32Complex
| Opcode::Sload32Complex => {
| Opcode::Sload32Complex
| Opcode::Sload8x8
| Opcode::Uload8x8
| Opcode::Sload16x4
| Opcode::Uload16x4
| Opcode::Sload32x2
| Opcode::Uload32x2 => {
let off = ldst_offset(ctx.data(insn)).unwrap();
let elem_ty = match op {
Opcode::Sload8 | Opcode::Uload8 | Opcode::Sload8Complex | Opcode::Uload8Complex => {
@@ -844,6 +850,9 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
| Opcode::Sload32Complex
| Opcode::Uload32Complex => I32,
Opcode::Load | Opcode::LoadComplex => ctx.output_ty(insn, 0),
Opcode::Sload8x8 | Opcode::Uload8x8 => I8X8,
Opcode::Sload16x4 | Opcode::Uload16x4 => I16X4,
Opcode::Sload32x2 | Opcode::Uload32x2 => I32X2,
_ => unreachable!(),
};
let sign_extend = match op {
@@ -877,10 +886,30 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
(32, true, false) => Inst::SLoad32 { rd, mem, srcloc },
(32, _, true) => Inst::FpuLoad32 { rd, mem, srcloc },
(64, _, false) => Inst::ULoad64 { rd, mem, srcloc },
// Note that we treat some of the vector loads as scalar floating-point loads,
// which is correct in a little endian environment.
(64, _, true) => Inst::FpuLoad64 { rd, mem, srcloc },
(128, _, _) => Inst::FpuLoad128 { rd, mem, srcloc },
_ => panic!("Unsupported size in load"),
});
let vec_extend = match op {
Opcode::Sload8x8 => Some(VecExtendOp::Sxtl8),
Opcode::Uload8x8 => Some(VecExtendOp::Uxtl8),
Opcode::Sload16x4 => Some(VecExtendOp::Sxtl16),
Opcode::Uload16x4 => Some(VecExtendOp::Uxtl16),
Opcode::Sload32x2 => Some(VecExtendOp::Sxtl32),
Opcode::Uload32x2 => Some(VecExtendOp::Uxtl32),
_ => None,
};
if let Some(t) = vec_extend {
ctx.emit(Inst::VecExtend {
t,
rd,
rn: rd.to_reg(),
});
}
}
Opcode::Store
@@ -1433,17 +1462,11 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
| Opcode::Extractlane
| Opcode::ScalarToVector
| Opcode::Swizzle
| Opcode::Uload8x8
| Opcode::Uload8x8Complex
| Opcode::Sload8x8
| Opcode::Sload8x8Complex
| Opcode::Uload16x4
| Opcode::Uload16x4Complex
| Opcode::Sload16x4
| Opcode::Sload16x4Complex
| Opcode::Uload32x2
| Opcode::Uload32x2Complex
| Opcode::Sload32x2
| Opcode::Sload32x2Complex => {
// TODO
panic!("Vector ops not implemented.");