Implement vector element extensions for AArch64

This commit also includes load and extend operations. Both are
prerequisites for enabling further SIMD spec tests.

Copyright (c) 2020, Arm Limited.
This commit is contained in:
Anton Kirilov
2020-05-28 17:19:36 +01:00
parent fb9f39ce17
commit 51a551fb39
8 changed files with 164 additions and 13 deletions

View File

@@ -180,8 +180,8 @@ fn ignore(testsuite: &str, testname: &str, strategy: &str) -> bool {
_ => (), _ => (),
}, },
"Cranelift" => match (testsuite, testname) { "Cranelift" => match (testsuite, testname) {
("simd", "simd_store") => return false,
("simd", "simd_i8x16_cmp") => return false, ("simd", "simd_i8x16_cmp") => return false,
("simd", "simd_store") => return false,
// Most simd tests are known to fail on aarch64 for now, it's going // Most simd tests are known to fail on aarch64 for now, it's going
// to be a big chunk of work to implement them all there! // to be a big chunk of work to implement them all there!
("simd", _) if target.contains("aarch64") => return true, ("simd", _) if target.contains("aarch64") => return true,

View File

@@ -406,7 +406,7 @@ fn in_int_reg(ty: ir::Type) -> bool {
fn in_vec_reg(ty: ir::Type) -> bool { fn in_vec_reg(ty: ir::Type) -> bool {
match ty { match ty {
types::F32 | types::F64 | types::I8X16 => true, types::F32 | types::F64 | types::I8X16 | types::I16X8 | types::I32X4 | types::I64X2 => true,
_ => false, _ => false,
} }
} }

View File

@@ -1149,6 +1149,23 @@ impl MachInstEmit for Inst {
| machreg_to_gpr(rd.to_reg()), | machreg_to_gpr(rd.to_reg()),
); );
} }
&Inst::VecExtend { t, rd, rn } => {
let (u, immh) = match t {
VecExtendOp::Sxtl8 => (0b0, 0b001),
VecExtendOp::Sxtl16 => (0b0, 0b010),
VecExtendOp::Sxtl32 => (0b0, 0b100),
VecExtendOp::Uxtl8 => (0b1, 0b001),
VecExtendOp::Uxtl16 => (0b1, 0b010),
VecExtendOp::Uxtl32 => (0b1, 0b100),
};
sink.put4(
0b000_011110_0000_000_101001_00000_00000
| (u << 29)
| (immh << 19)
| (machreg_to_vec(rn) << 5)
| machreg_to_vec(rd.to_reg()),
);
}
&Inst::VecRRR { &Inst::VecRRR {
rd, rd,
rn, rn,

View File

@@ -1826,6 +1826,60 @@ fn test_aarch64_binemit() {
"E5979F9A", "E5979F9A",
"cset x5, hi", "cset x5, hi",
)); ));
insns.push((
Inst::VecExtend {
t: VecExtendOp::Sxtl8,
rd: writable_vreg(4),
rn: vreg(27),
},
"64A7080F",
"sxtl v4.8h, v27.8b",
));
insns.push((
Inst::VecExtend {
t: VecExtendOp::Sxtl16,
rd: writable_vreg(17),
rn: vreg(19),
},
"71A6100F",
"sxtl v17.4s, v19.4h",
));
insns.push((
Inst::VecExtend {
t: VecExtendOp::Sxtl32,
rd: writable_vreg(30),
rn: vreg(6),
},
"DEA4200F",
"sxtl v30.2d, v6.2s",
));
insns.push((
Inst::VecExtend {
t: VecExtendOp::Uxtl8,
rd: writable_vreg(3),
rn: vreg(29),
},
"A3A7082F",
"uxtl v3.8h, v29.8b",
));
insns.push((
Inst::VecExtend {
t: VecExtendOp::Uxtl16,
rd: writable_vreg(15),
rn: vreg(12),
},
"8FA5102F",
"uxtl v15.4s, v12.4h",
));
insns.push((
Inst::VecExtend {
t: VecExtendOp::Uxtl32,
rd: writable_vreg(28),
rn: vreg(2),
},
"5CA4202F",
"uxtl v28.2d, v2.2s",
));
insns.push(( insns.push((
Inst::VecRRR { Inst::VecRRR {
rd: writable_vreg(21), rd: writable_vreg(21),

View File

@@ -5,7 +5,8 @@
use crate::binemit::CodeOffset; use crate::binemit::CodeOffset;
use crate::ir::types::{ use crate::ir::types::{
B1, B16, B32, B64, B8, B8X16, F32, F32X2, F64, FFLAGS, I128, I16, I32, I64, I8, I8X16, IFLAGS, B1, B16, B32, B64, B8, B8X16, F32, F32X2, F64, FFLAGS, I128, I16, I16X4, I16X8, I32, I32X2,
I32X4, I64, I64X2, I8, I8X16, I8X8, IFLAGS,
}; };
use crate::ir::{ExternalName, Opcode, SourceLoc, TrapCode, Type}; use crate::ir::{ExternalName, Opcode, SourceLoc, TrapCode, Type};
use crate::machinst::*; use crate::machinst::*;
@@ -186,6 +187,23 @@ pub enum FpuRoundMode {
Nearest64, Nearest64,
} }
/// Type of vector element extensions.
#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
pub enum VecExtendOp {
/// Signed extension of 8-bit elements
Sxtl8,
/// Signed extension of 16-bit elements
Sxtl16,
/// Signed extension of 32-bit elements
Sxtl32,
/// Unsigned extension of 8-bit elements
Uxtl8,
/// Unsigned extension of 16-bit elements
Uxtl16,
/// Unsigned extension of 32-bit elements
Uxtl32,
}
/// A vector ALU operation. /// A vector ALU operation.
#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)] #[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
pub enum VecALUOp { pub enum VecALUOp {
@@ -667,6 +685,13 @@ pub enum Inst {
rn: Reg, rn: Reg,
}, },
/// Vector extend.
VecExtend {
t: VecExtendOp,
rd: Writable<Reg>,
rn: Reg,
},
/// A vector ALU op. /// A vector ALU op.
VecRRR { VecRRR {
alu_op: VecALUOp, alu_op: VecALUOp,
@@ -1208,6 +1233,10 @@ fn aarch64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
collector.add_def(rd); collector.add_def(rd);
collector.add_use(rn); collector.add_use(rn);
} }
&Inst::VecExtend { rd, rn, .. } => {
collector.add_def(rd);
collector.add_use(rn);
}
&Inst::VecRRR { rd, rn, rm, .. } => { &Inst::VecRRR { rd, rn, rm, .. } => {
collector.add_def(rd); collector.add_def(rd);
collector.add_use(rn); collector.add_use(rn);
@@ -1752,6 +1781,14 @@ fn aarch64_map_regs<RUM: RegUsageMapper>(inst: &mut Inst, mapper: &RUM) {
map_def(mapper, rd); map_def(mapper, rd);
map_use(mapper, rn); map_use(mapper, rn);
} }
&mut Inst::VecExtend {
ref mut rd,
ref mut rn,
..
} => {
map_def(mapper, rd);
map_use(mapper, rn);
}
&mut Inst::VecRRR { &mut Inst::VecRRR {
ref mut rd, ref mut rd,
ref mut rn, ref mut rn,
@@ -1940,7 +1977,7 @@ impl MachInst for Inst {
I8 | I16 | I32 | I64 | B1 | B8 | B16 | B32 | B64 => Ok(RegClass::I64), I8 | I16 | I32 | I64 | B1 | B8 | B16 | B32 | B64 => Ok(RegClass::I64),
F32 | F64 => Ok(RegClass::V128), F32 | F64 => Ok(RegClass::V128),
IFLAGS | FFLAGS => Ok(RegClass::I64), IFLAGS | FFLAGS => Ok(RegClass::I64),
I8X16 => Ok(RegClass::V128), I8X16 | I16X8 | I32X4 | I64X2 => Ok(RegClass::V128),
B8X16 => Ok(RegClass::V128), B8X16 => Ok(RegClass::V128),
_ => Err(CodegenError::Unsupported(format!( _ => Err(CodegenError::Unsupported(format!(
"Unexpected SSA-value type: {}", "Unexpected SSA-value type: {}",
@@ -2515,6 +2552,19 @@ impl ShowWithRRU for Inst {
let rn = rn.show_rru(mb_rru); let rn = rn.show_rru(mb_rru);
format!("mov {}, {}.d[0]", rd, rn) format!("mov {}, {}.d[0]", rd, rn)
} }
&Inst::VecExtend { t, rd, rn } => {
let (op, dest, src) = match t {
VecExtendOp::Sxtl8 => ("sxtl", I16X8, I8X8),
VecExtendOp::Sxtl16 => ("sxtl", I32X4, I16X4),
VecExtendOp::Sxtl32 => ("sxtl", I64X2, I32X2),
VecExtendOp::Uxtl8 => ("uxtl", I16X8, I8X8),
VecExtendOp::Uxtl16 => ("uxtl", I32X4, I16X4),
VecExtendOp::Uxtl32 => ("uxtl", I64X2, I32X2),
};
let rd = show_vreg_vector(rd.to_reg(), mb_rru, dest);
let rn = show_vreg_vector(rn, mb_rru, src);
format!("{} {}, {}", op, rd, rn)
}
&Inst::VecRRR { &Inst::VecRRR {
rd, rd,
rn, rn,

View File

@@ -321,6 +321,12 @@ pub fn show_vreg_vector(reg: Reg, mb_rru: Option<&RealRegUniverse>, ty: Type) ->
match ty { match ty {
I8X16 => s.push_str(".16b"), I8X16 => s.push_str(".16b"),
F32X2 => s.push_str(".2s"), F32X2 => s.push_str(".2s"),
I8X8 => s.push_str(".8b"),
I16X4 => s.push_str(".4h"),
I16X8 => s.push_str(".8h"),
I32X2 => s.push_str(".2s"),
I32X4 => s.push_str(".4s"),
I64X2 => s.push_str(".2d"),
_ => unimplemented!(), _ => unimplemented!(),
} }

View File

@@ -716,7 +716,8 @@ pub fn ty_bits(ty: Type) -> usize {
B64 | I64 | F64 => 64, B64 | I64 | F64 => 64,
B128 | I128 => 128, B128 | I128 => 128,
IFLAGS | FFLAGS => 32, IFLAGS | FFLAGS => 32,
I8X16 | B8X16 => 128, I8X8 | I16X4 | I32X2 => 64,
B8X16 | I8X16 | I16X8 | I32X4 | I64X2 => 128,
_ => panic!("ty_bits() on unknown type: {:?}", ty), _ => panic!("ty_bits() on unknown type: {:?}", ty),
} }
} }
@@ -724,7 +725,7 @@ pub fn ty_bits(ty: Type) -> usize {
pub(crate) fn ty_is_int(ty: Type) -> bool { pub(crate) fn ty_is_int(ty: Type) -> bool {
match ty { match ty {
B1 | B8 | I8 | B16 | I16 | B32 | I32 | B64 | I64 => true, B1 | B8 | I8 | B16 | I16 | B32 | I32 | B64 | I64 => true,
F32 | F64 | B128 | I128 | I8X16 => false, F32 | F64 | B128 | I128 | I8X8 | I8X16 | I16X4 | I16X8 | I32X2 | I32X4 | I64X2 => false,
IFLAGS | FFLAGS => panic!("Unexpected flags type"), IFLAGS | FFLAGS => panic!("Unexpected flags type"),
_ => panic!("ty_is_int() on unknown type: {:?}", ty), _ => panic!("ty_is_int() on unknown type: {:?}", ty),
} }

View File

@@ -829,7 +829,13 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
| Opcode::Uload16Complex | Opcode::Uload16Complex
| Opcode::Sload16Complex | Opcode::Sload16Complex
| Opcode::Uload32Complex | Opcode::Uload32Complex
| Opcode::Sload32Complex => { | Opcode::Sload32Complex
| Opcode::Sload8x8
| Opcode::Uload8x8
| Opcode::Sload16x4
| Opcode::Uload16x4
| Opcode::Sload32x2
| Opcode::Uload32x2 => {
let off = ldst_offset(ctx.data(insn)).unwrap(); let off = ldst_offset(ctx.data(insn)).unwrap();
let elem_ty = match op { let elem_ty = match op {
Opcode::Sload8 | Opcode::Uload8 | Opcode::Sload8Complex | Opcode::Uload8Complex => { Opcode::Sload8 | Opcode::Uload8 | Opcode::Sload8Complex | Opcode::Uload8Complex => {
@@ -844,6 +850,9 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
| Opcode::Sload32Complex | Opcode::Sload32Complex
| Opcode::Uload32Complex => I32, | Opcode::Uload32Complex => I32,
Opcode::Load | Opcode::LoadComplex => ctx.output_ty(insn, 0), Opcode::Load | Opcode::LoadComplex => ctx.output_ty(insn, 0),
Opcode::Sload8x8 | Opcode::Uload8x8 => I8X8,
Opcode::Sload16x4 | Opcode::Uload16x4 => I16X4,
Opcode::Sload32x2 | Opcode::Uload32x2 => I32X2,
_ => unreachable!(), _ => unreachable!(),
}; };
let sign_extend = match op { let sign_extend = match op {
@@ -877,10 +886,30 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
(32, true, false) => Inst::SLoad32 { rd, mem, srcloc }, (32, true, false) => Inst::SLoad32 { rd, mem, srcloc },
(32, _, true) => Inst::FpuLoad32 { rd, mem, srcloc }, (32, _, true) => Inst::FpuLoad32 { rd, mem, srcloc },
(64, _, false) => Inst::ULoad64 { rd, mem, srcloc }, (64, _, false) => Inst::ULoad64 { rd, mem, srcloc },
// Note that we treat some of the vector loads as scalar floating-point loads,
// which is correct in a little endian environment.
(64, _, true) => Inst::FpuLoad64 { rd, mem, srcloc }, (64, _, true) => Inst::FpuLoad64 { rd, mem, srcloc },
(128, _, _) => Inst::FpuLoad128 { rd, mem, srcloc }, (128, _, _) => Inst::FpuLoad128 { rd, mem, srcloc },
_ => panic!("Unsupported size in load"), _ => panic!("Unsupported size in load"),
}); });
let vec_extend = match op {
Opcode::Sload8x8 => Some(VecExtendOp::Sxtl8),
Opcode::Uload8x8 => Some(VecExtendOp::Uxtl8),
Opcode::Sload16x4 => Some(VecExtendOp::Sxtl16),
Opcode::Uload16x4 => Some(VecExtendOp::Uxtl16),
Opcode::Sload32x2 => Some(VecExtendOp::Sxtl32),
Opcode::Uload32x2 => Some(VecExtendOp::Uxtl32),
_ => None,
};
if let Some(t) = vec_extend {
ctx.emit(Inst::VecExtend {
t,
rd,
rn: rd.to_reg(),
});
}
} }
Opcode::Store Opcode::Store
@@ -1433,17 +1462,11 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
| Opcode::Extractlane | Opcode::Extractlane
| Opcode::ScalarToVector | Opcode::ScalarToVector
| Opcode::Swizzle | Opcode::Swizzle
| Opcode::Uload8x8
| Opcode::Uload8x8Complex | Opcode::Uload8x8Complex
| Opcode::Sload8x8
| Opcode::Sload8x8Complex | Opcode::Sload8x8Complex
| Opcode::Uload16x4
| Opcode::Uload16x4Complex | Opcode::Uload16x4Complex
| Opcode::Sload16x4
| Opcode::Sload16x4Complex | Opcode::Sload16x4Complex
| Opcode::Uload32x2
| Opcode::Uload32x2Complex | Opcode::Uload32x2Complex
| Opcode::Sload32x2
| Opcode::Sload32x2Complex => { | Opcode::Sload32x2Complex => {
// TODO // TODO
panic!("Vector ops not implemented."); panic!("Vector ops not implemented.");