Implement vector element extensions for AArch64
This commit also includes load and extend operations. Both are prerequisites for enabling further SIMD spec tests. Copyright (c) 2020, Arm Limited.
This commit is contained in:
2
build.rs
2
build.rs
@@ -180,8 +180,8 @@ fn ignore(testsuite: &str, testname: &str, strategy: &str) -> bool {
|
|||||||
_ => (),
|
_ => (),
|
||||||
},
|
},
|
||||||
"Cranelift" => match (testsuite, testname) {
|
"Cranelift" => match (testsuite, testname) {
|
||||||
("simd", "simd_store") => return false,
|
|
||||||
("simd", "simd_i8x16_cmp") => return false,
|
("simd", "simd_i8x16_cmp") => return false,
|
||||||
|
("simd", "simd_store") => return false,
|
||||||
// Most simd tests are known to fail on aarch64 for now, it's going
|
// Most simd tests are known to fail on aarch64 for now, it's going
|
||||||
// to be a big chunk of work to implement them all there!
|
// to be a big chunk of work to implement them all there!
|
||||||
("simd", _) if target.contains("aarch64") => return true,
|
("simd", _) if target.contains("aarch64") => return true,
|
||||||
|
|||||||
@@ -406,7 +406,7 @@ fn in_int_reg(ty: ir::Type) -> bool {
|
|||||||
|
|
||||||
fn in_vec_reg(ty: ir::Type) -> bool {
|
fn in_vec_reg(ty: ir::Type) -> bool {
|
||||||
match ty {
|
match ty {
|
||||||
types::F32 | types::F64 | types::I8X16 => true,
|
types::F32 | types::F64 | types::I8X16 | types::I16X8 | types::I32X4 | types::I64X2 => true,
|
||||||
_ => false,
|
_ => false,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1149,6 +1149,23 @@ impl MachInstEmit for Inst {
|
|||||||
| machreg_to_gpr(rd.to_reg()),
|
| machreg_to_gpr(rd.to_reg()),
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
&Inst::VecExtend { t, rd, rn } => {
|
||||||
|
let (u, immh) = match t {
|
||||||
|
VecExtendOp::Sxtl8 => (0b0, 0b001),
|
||||||
|
VecExtendOp::Sxtl16 => (0b0, 0b010),
|
||||||
|
VecExtendOp::Sxtl32 => (0b0, 0b100),
|
||||||
|
VecExtendOp::Uxtl8 => (0b1, 0b001),
|
||||||
|
VecExtendOp::Uxtl16 => (0b1, 0b010),
|
||||||
|
VecExtendOp::Uxtl32 => (0b1, 0b100),
|
||||||
|
};
|
||||||
|
sink.put4(
|
||||||
|
0b000_011110_0000_000_101001_00000_00000
|
||||||
|
| (u << 29)
|
||||||
|
| (immh << 19)
|
||||||
|
| (machreg_to_vec(rn) << 5)
|
||||||
|
| machreg_to_vec(rd.to_reg()),
|
||||||
|
);
|
||||||
|
}
|
||||||
&Inst::VecRRR {
|
&Inst::VecRRR {
|
||||||
rd,
|
rd,
|
||||||
rn,
|
rn,
|
||||||
|
|||||||
@@ -1826,6 +1826,60 @@ fn test_aarch64_binemit() {
|
|||||||
"E5979F9A",
|
"E5979F9A",
|
||||||
"cset x5, hi",
|
"cset x5, hi",
|
||||||
));
|
));
|
||||||
|
insns.push((
|
||||||
|
Inst::VecExtend {
|
||||||
|
t: VecExtendOp::Sxtl8,
|
||||||
|
rd: writable_vreg(4),
|
||||||
|
rn: vreg(27),
|
||||||
|
},
|
||||||
|
"64A7080F",
|
||||||
|
"sxtl v4.8h, v27.8b",
|
||||||
|
));
|
||||||
|
insns.push((
|
||||||
|
Inst::VecExtend {
|
||||||
|
t: VecExtendOp::Sxtl16,
|
||||||
|
rd: writable_vreg(17),
|
||||||
|
rn: vreg(19),
|
||||||
|
},
|
||||||
|
"71A6100F",
|
||||||
|
"sxtl v17.4s, v19.4h",
|
||||||
|
));
|
||||||
|
insns.push((
|
||||||
|
Inst::VecExtend {
|
||||||
|
t: VecExtendOp::Sxtl32,
|
||||||
|
rd: writable_vreg(30),
|
||||||
|
rn: vreg(6),
|
||||||
|
},
|
||||||
|
"DEA4200F",
|
||||||
|
"sxtl v30.2d, v6.2s",
|
||||||
|
));
|
||||||
|
insns.push((
|
||||||
|
Inst::VecExtend {
|
||||||
|
t: VecExtendOp::Uxtl8,
|
||||||
|
rd: writable_vreg(3),
|
||||||
|
rn: vreg(29),
|
||||||
|
},
|
||||||
|
"A3A7082F",
|
||||||
|
"uxtl v3.8h, v29.8b",
|
||||||
|
));
|
||||||
|
insns.push((
|
||||||
|
Inst::VecExtend {
|
||||||
|
t: VecExtendOp::Uxtl16,
|
||||||
|
rd: writable_vreg(15),
|
||||||
|
rn: vreg(12),
|
||||||
|
},
|
||||||
|
"8FA5102F",
|
||||||
|
"uxtl v15.4s, v12.4h",
|
||||||
|
));
|
||||||
|
insns.push((
|
||||||
|
Inst::VecExtend {
|
||||||
|
t: VecExtendOp::Uxtl32,
|
||||||
|
rd: writable_vreg(28),
|
||||||
|
rn: vreg(2),
|
||||||
|
},
|
||||||
|
"5CA4202F",
|
||||||
|
"uxtl v28.2d, v2.2s",
|
||||||
|
));
|
||||||
insns.push((
|
insns.push((
|
||||||
Inst::VecRRR {
|
Inst::VecRRR {
|
||||||
rd: writable_vreg(21),
|
rd: writable_vreg(21),
|
||||||
|
|||||||
@@ -5,7 +5,8 @@
|
|||||||
|
|
||||||
use crate::binemit::CodeOffset;
|
use crate::binemit::CodeOffset;
|
||||||
use crate::ir::types::{
|
use crate::ir::types::{
|
||||||
B1, B16, B32, B64, B8, B8X16, F32, F32X2, F64, FFLAGS, I128, I16, I32, I64, I8, I8X16, IFLAGS,
|
B1, B16, B32, B64, B8, B8X16, F32, F32X2, F64, FFLAGS, I128, I16, I16X4, I16X8, I32, I32X2,
|
||||||
|
I32X4, I64, I64X2, I8, I8X16, I8X8, IFLAGS,
|
||||||
};
|
};
|
||||||
use crate::ir::{ExternalName, Opcode, SourceLoc, TrapCode, Type};
|
use crate::ir::{ExternalName, Opcode, SourceLoc, TrapCode, Type};
|
||||||
use crate::machinst::*;
|
use crate::machinst::*;
|
||||||
@@ -186,6 +187,23 @@ pub enum FpuRoundMode {
|
|||||||
Nearest64,
|
Nearest64,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Type of vector element extensions.
|
||||||
|
#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
|
||||||
|
pub enum VecExtendOp {
|
||||||
|
/// Signed extension of 8-bit elements
|
||||||
|
Sxtl8,
|
||||||
|
/// Signed extension of 16-bit elements
|
||||||
|
Sxtl16,
|
||||||
|
/// Signed extension of 32-bit elements
|
||||||
|
Sxtl32,
|
||||||
|
/// Unsigned extension of 8-bit elements
|
||||||
|
Uxtl8,
|
||||||
|
/// Unsigned extension of 16-bit elements
|
||||||
|
Uxtl16,
|
||||||
|
/// Unsigned extension of 32-bit elements
|
||||||
|
Uxtl32,
|
||||||
|
}
|
||||||
|
|
||||||
/// A vector ALU operation.
|
/// A vector ALU operation.
|
||||||
#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
|
#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
|
||||||
pub enum VecALUOp {
|
pub enum VecALUOp {
|
||||||
@@ -667,6 +685,13 @@ pub enum Inst {
|
|||||||
rn: Reg,
|
rn: Reg,
|
||||||
},
|
},
|
||||||
|
|
||||||
|
/// Vector extend.
|
||||||
|
VecExtend {
|
||||||
|
t: VecExtendOp,
|
||||||
|
rd: Writable<Reg>,
|
||||||
|
rn: Reg,
|
||||||
|
},
|
||||||
|
|
||||||
/// A vector ALU op.
|
/// A vector ALU op.
|
||||||
VecRRR {
|
VecRRR {
|
||||||
alu_op: VecALUOp,
|
alu_op: VecALUOp,
|
||||||
@@ -1208,6 +1233,10 @@ fn aarch64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
|
|||||||
collector.add_def(rd);
|
collector.add_def(rd);
|
||||||
collector.add_use(rn);
|
collector.add_use(rn);
|
||||||
}
|
}
|
||||||
|
&Inst::VecExtend { rd, rn, .. } => {
|
||||||
|
collector.add_def(rd);
|
||||||
|
collector.add_use(rn);
|
||||||
|
}
|
||||||
&Inst::VecRRR { rd, rn, rm, .. } => {
|
&Inst::VecRRR { rd, rn, rm, .. } => {
|
||||||
collector.add_def(rd);
|
collector.add_def(rd);
|
||||||
collector.add_use(rn);
|
collector.add_use(rn);
|
||||||
@@ -1752,6 +1781,14 @@ fn aarch64_map_regs<RUM: RegUsageMapper>(inst: &mut Inst, mapper: &RUM) {
|
|||||||
map_def(mapper, rd);
|
map_def(mapper, rd);
|
||||||
map_use(mapper, rn);
|
map_use(mapper, rn);
|
||||||
}
|
}
|
||||||
|
&mut Inst::VecExtend {
|
||||||
|
ref mut rd,
|
||||||
|
ref mut rn,
|
||||||
|
..
|
||||||
|
} => {
|
||||||
|
map_def(mapper, rd);
|
||||||
|
map_use(mapper, rn);
|
||||||
|
}
|
||||||
&mut Inst::VecRRR {
|
&mut Inst::VecRRR {
|
||||||
ref mut rd,
|
ref mut rd,
|
||||||
ref mut rn,
|
ref mut rn,
|
||||||
@@ -1940,7 +1977,7 @@ impl MachInst for Inst {
|
|||||||
I8 | I16 | I32 | I64 | B1 | B8 | B16 | B32 | B64 => Ok(RegClass::I64),
|
I8 | I16 | I32 | I64 | B1 | B8 | B16 | B32 | B64 => Ok(RegClass::I64),
|
||||||
F32 | F64 => Ok(RegClass::V128),
|
F32 | F64 => Ok(RegClass::V128),
|
||||||
IFLAGS | FFLAGS => Ok(RegClass::I64),
|
IFLAGS | FFLAGS => Ok(RegClass::I64),
|
||||||
I8X16 => Ok(RegClass::V128),
|
I8X16 | I16X8 | I32X4 | I64X2 => Ok(RegClass::V128),
|
||||||
B8X16 => Ok(RegClass::V128),
|
B8X16 => Ok(RegClass::V128),
|
||||||
_ => Err(CodegenError::Unsupported(format!(
|
_ => Err(CodegenError::Unsupported(format!(
|
||||||
"Unexpected SSA-value type: {}",
|
"Unexpected SSA-value type: {}",
|
||||||
@@ -2515,6 +2552,19 @@ impl ShowWithRRU for Inst {
|
|||||||
let rn = rn.show_rru(mb_rru);
|
let rn = rn.show_rru(mb_rru);
|
||||||
format!("mov {}, {}.d[0]", rd, rn)
|
format!("mov {}, {}.d[0]", rd, rn)
|
||||||
}
|
}
|
||||||
|
&Inst::VecExtend { t, rd, rn } => {
|
||||||
|
let (op, dest, src) = match t {
|
||||||
|
VecExtendOp::Sxtl8 => ("sxtl", I16X8, I8X8),
|
||||||
|
VecExtendOp::Sxtl16 => ("sxtl", I32X4, I16X4),
|
||||||
|
VecExtendOp::Sxtl32 => ("sxtl", I64X2, I32X2),
|
||||||
|
VecExtendOp::Uxtl8 => ("uxtl", I16X8, I8X8),
|
||||||
|
VecExtendOp::Uxtl16 => ("uxtl", I32X4, I16X4),
|
||||||
|
VecExtendOp::Uxtl32 => ("uxtl", I64X2, I32X2),
|
||||||
|
};
|
||||||
|
let rd = show_vreg_vector(rd.to_reg(), mb_rru, dest);
|
||||||
|
let rn = show_vreg_vector(rn, mb_rru, src);
|
||||||
|
format!("{} {}, {}", op, rd, rn)
|
||||||
|
}
|
||||||
&Inst::VecRRR {
|
&Inst::VecRRR {
|
||||||
rd,
|
rd,
|
||||||
rn,
|
rn,
|
||||||
|
|||||||
@@ -321,6 +321,12 @@ pub fn show_vreg_vector(reg: Reg, mb_rru: Option<&RealRegUniverse>, ty: Type) ->
|
|||||||
match ty {
|
match ty {
|
||||||
I8X16 => s.push_str(".16b"),
|
I8X16 => s.push_str(".16b"),
|
||||||
F32X2 => s.push_str(".2s"),
|
F32X2 => s.push_str(".2s"),
|
||||||
|
I8X8 => s.push_str(".8b"),
|
||||||
|
I16X4 => s.push_str(".4h"),
|
||||||
|
I16X8 => s.push_str(".8h"),
|
||||||
|
I32X2 => s.push_str(".2s"),
|
||||||
|
I32X4 => s.push_str(".4s"),
|
||||||
|
I64X2 => s.push_str(".2d"),
|
||||||
_ => unimplemented!(),
|
_ => unimplemented!(),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -716,7 +716,8 @@ pub fn ty_bits(ty: Type) -> usize {
|
|||||||
B64 | I64 | F64 => 64,
|
B64 | I64 | F64 => 64,
|
||||||
B128 | I128 => 128,
|
B128 | I128 => 128,
|
||||||
IFLAGS | FFLAGS => 32,
|
IFLAGS | FFLAGS => 32,
|
||||||
I8X16 | B8X16 => 128,
|
I8X8 | I16X4 | I32X2 => 64,
|
||||||
|
B8X16 | I8X16 | I16X8 | I32X4 | I64X2 => 128,
|
||||||
_ => panic!("ty_bits() on unknown type: {:?}", ty),
|
_ => panic!("ty_bits() on unknown type: {:?}", ty),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -724,7 +725,7 @@ pub fn ty_bits(ty: Type) -> usize {
|
|||||||
pub(crate) fn ty_is_int(ty: Type) -> bool {
|
pub(crate) fn ty_is_int(ty: Type) -> bool {
|
||||||
match ty {
|
match ty {
|
||||||
B1 | B8 | I8 | B16 | I16 | B32 | I32 | B64 | I64 => true,
|
B1 | B8 | I8 | B16 | I16 | B32 | I32 | B64 | I64 => true,
|
||||||
F32 | F64 | B128 | I128 | I8X16 => false,
|
F32 | F64 | B128 | I128 | I8X8 | I8X16 | I16X4 | I16X8 | I32X2 | I32X4 | I64X2 => false,
|
||||||
IFLAGS | FFLAGS => panic!("Unexpected flags type"),
|
IFLAGS | FFLAGS => panic!("Unexpected flags type"),
|
||||||
_ => panic!("ty_is_int() on unknown type: {:?}", ty),
|
_ => panic!("ty_is_int() on unknown type: {:?}", ty),
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -829,7 +829,13 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||||||
| Opcode::Uload16Complex
|
| Opcode::Uload16Complex
|
||||||
| Opcode::Sload16Complex
|
| Opcode::Sload16Complex
|
||||||
| Opcode::Uload32Complex
|
| Opcode::Uload32Complex
|
||||||
| Opcode::Sload32Complex => {
|
| Opcode::Sload32Complex
|
||||||
|
| Opcode::Sload8x8
|
||||||
|
| Opcode::Uload8x8
|
||||||
|
| Opcode::Sload16x4
|
||||||
|
| Opcode::Uload16x4
|
||||||
|
| Opcode::Sload32x2
|
||||||
|
| Opcode::Uload32x2 => {
|
||||||
let off = ldst_offset(ctx.data(insn)).unwrap();
|
let off = ldst_offset(ctx.data(insn)).unwrap();
|
||||||
let elem_ty = match op {
|
let elem_ty = match op {
|
||||||
Opcode::Sload8 | Opcode::Uload8 | Opcode::Sload8Complex | Opcode::Uload8Complex => {
|
Opcode::Sload8 | Opcode::Uload8 | Opcode::Sload8Complex | Opcode::Uload8Complex => {
|
||||||
@@ -844,6 +850,9 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||||||
| Opcode::Sload32Complex
|
| Opcode::Sload32Complex
|
||||||
| Opcode::Uload32Complex => I32,
|
| Opcode::Uload32Complex => I32,
|
||||||
Opcode::Load | Opcode::LoadComplex => ctx.output_ty(insn, 0),
|
Opcode::Load | Opcode::LoadComplex => ctx.output_ty(insn, 0),
|
||||||
|
Opcode::Sload8x8 | Opcode::Uload8x8 => I8X8,
|
||||||
|
Opcode::Sload16x4 | Opcode::Uload16x4 => I16X4,
|
||||||
|
Opcode::Sload32x2 | Opcode::Uload32x2 => I32X2,
|
||||||
_ => unreachable!(),
|
_ => unreachable!(),
|
||||||
};
|
};
|
||||||
let sign_extend = match op {
|
let sign_extend = match op {
|
||||||
@@ -877,10 +886,30 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||||||
(32, true, false) => Inst::SLoad32 { rd, mem, srcloc },
|
(32, true, false) => Inst::SLoad32 { rd, mem, srcloc },
|
||||||
(32, _, true) => Inst::FpuLoad32 { rd, mem, srcloc },
|
(32, _, true) => Inst::FpuLoad32 { rd, mem, srcloc },
|
||||||
(64, _, false) => Inst::ULoad64 { rd, mem, srcloc },
|
(64, _, false) => Inst::ULoad64 { rd, mem, srcloc },
|
||||||
|
// Note that we treat some of the vector loads as scalar floating-point loads,
|
||||||
|
// which is correct in a little endian environment.
|
||||||
(64, _, true) => Inst::FpuLoad64 { rd, mem, srcloc },
|
(64, _, true) => Inst::FpuLoad64 { rd, mem, srcloc },
|
||||||
(128, _, _) => Inst::FpuLoad128 { rd, mem, srcloc },
|
(128, _, _) => Inst::FpuLoad128 { rd, mem, srcloc },
|
||||||
_ => panic!("Unsupported size in load"),
|
_ => panic!("Unsupported size in load"),
|
||||||
});
|
});
|
||||||
|
|
||||||
|
let vec_extend = match op {
|
||||||
|
Opcode::Sload8x8 => Some(VecExtendOp::Sxtl8),
|
||||||
|
Opcode::Uload8x8 => Some(VecExtendOp::Uxtl8),
|
||||||
|
Opcode::Sload16x4 => Some(VecExtendOp::Sxtl16),
|
||||||
|
Opcode::Uload16x4 => Some(VecExtendOp::Uxtl16),
|
||||||
|
Opcode::Sload32x2 => Some(VecExtendOp::Sxtl32),
|
||||||
|
Opcode::Uload32x2 => Some(VecExtendOp::Uxtl32),
|
||||||
|
_ => None,
|
||||||
|
};
|
||||||
|
|
||||||
|
if let Some(t) = vec_extend {
|
||||||
|
ctx.emit(Inst::VecExtend {
|
||||||
|
t,
|
||||||
|
rd,
|
||||||
|
rn: rd.to_reg(),
|
||||||
|
});
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Opcode::Store
|
Opcode::Store
|
||||||
@@ -1433,17 +1462,11 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||||||
| Opcode::Extractlane
|
| Opcode::Extractlane
|
||||||
| Opcode::ScalarToVector
|
| Opcode::ScalarToVector
|
||||||
| Opcode::Swizzle
|
| Opcode::Swizzle
|
||||||
| Opcode::Uload8x8
|
|
||||||
| Opcode::Uload8x8Complex
|
| Opcode::Uload8x8Complex
|
||||||
| Opcode::Sload8x8
|
|
||||||
| Opcode::Sload8x8Complex
|
| Opcode::Sload8x8Complex
|
||||||
| Opcode::Uload16x4
|
|
||||||
| Opcode::Uload16x4Complex
|
| Opcode::Uload16x4Complex
|
||||||
| Opcode::Sload16x4
|
|
||||||
| Opcode::Sload16x4Complex
|
| Opcode::Sload16x4Complex
|
||||||
| Opcode::Uload32x2
|
|
||||||
| Opcode::Uload32x2Complex
|
| Opcode::Uload32x2Complex
|
||||||
| Opcode::Sload32x2
|
|
||||||
| Opcode::Sload32x2Complex => {
|
| Opcode::Sload32x2Complex => {
|
||||||
// TODO
|
// TODO
|
||||||
panic!("Vector ops not implemented.");
|
panic!("Vector ops not implemented.");
|
||||||
|
|||||||
Reference in New Issue
Block a user