Merge pull request #1802 from akirilov-arm/simd_align
Enable the wast::Cranelift::spec::simd::simd_align test for AArch64
This commit is contained in:
2
build.rs
2
build.rs
@@ -180,8 +180,8 @@ fn ignore(testsuite: &str, testname: &str, strategy: &str) -> bool {
|
|||||||
_ => (),
|
_ => (),
|
||||||
},
|
},
|
||||||
"Cranelift" => match (testsuite, testname) {
|
"Cranelift" => match (testsuite, testname) {
|
||||||
("simd", "simd_store") => return false,
|
|
||||||
("simd", "simd_i8x16_cmp") => return false,
|
("simd", "simd_i8x16_cmp") => return false,
|
||||||
|
("simd", "simd_store") => return false,
|
||||||
// Most simd tests are known to fail on aarch64 for now, it's going
|
// Most simd tests are known to fail on aarch64 for now, it's going
|
||||||
// to be a big chunk of work to implement them all there!
|
// to be a big chunk of work to implement them all there!
|
||||||
("simd", _) if target.contains("aarch64") => return true,
|
("simd", _) if target.contains("aarch64") => return true,
|
||||||
|
|||||||
@@ -406,7 +406,7 @@ fn in_int_reg(ty: ir::Type) -> bool {
|
|||||||
|
|
||||||
fn in_vec_reg(ty: ir::Type) -> bool {
|
fn in_vec_reg(ty: ir::Type) -> bool {
|
||||||
match ty {
|
match ty {
|
||||||
types::F32 | types::F64 | types::I8X16 => true,
|
types::F32 | types::F64 | types::I8X16 | types::I16X8 | types::I32X4 | types::I64X2 => true,
|
||||||
_ => false,
|
_ => false,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1149,6 +1149,23 @@ impl MachInstEmit for Inst {
|
|||||||
| machreg_to_gpr(rd.to_reg()),
|
| machreg_to_gpr(rd.to_reg()),
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
&Inst::VecExtend { t, rd, rn } => {
|
||||||
|
let (u, immh) = match t {
|
||||||
|
VecExtendOp::Sxtl8 => (0b0, 0b001),
|
||||||
|
VecExtendOp::Sxtl16 => (0b0, 0b010),
|
||||||
|
VecExtendOp::Sxtl32 => (0b0, 0b100),
|
||||||
|
VecExtendOp::Uxtl8 => (0b1, 0b001),
|
||||||
|
VecExtendOp::Uxtl16 => (0b1, 0b010),
|
||||||
|
VecExtendOp::Uxtl32 => (0b1, 0b100),
|
||||||
|
};
|
||||||
|
sink.put4(
|
||||||
|
0b000_011110_0000_000_101001_00000_00000
|
||||||
|
| (u << 29)
|
||||||
|
| (immh << 19)
|
||||||
|
| (machreg_to_vec(rn) << 5)
|
||||||
|
| machreg_to_vec(rd.to_reg()),
|
||||||
|
);
|
||||||
|
}
|
||||||
&Inst::VecRRR {
|
&Inst::VecRRR {
|
||||||
rd,
|
rd,
|
||||||
rn,
|
rn,
|
||||||
|
|||||||
@@ -1826,6 +1826,60 @@ fn test_aarch64_binemit() {
|
|||||||
"E5979F9A",
|
"E5979F9A",
|
||||||
"cset x5, hi",
|
"cset x5, hi",
|
||||||
));
|
));
|
||||||
|
insns.push((
|
||||||
|
Inst::VecExtend {
|
||||||
|
t: VecExtendOp::Sxtl8,
|
||||||
|
rd: writable_vreg(4),
|
||||||
|
rn: vreg(27),
|
||||||
|
},
|
||||||
|
"64A7080F",
|
||||||
|
"sxtl v4.8h, v27.8b",
|
||||||
|
));
|
||||||
|
insns.push((
|
||||||
|
Inst::VecExtend {
|
||||||
|
t: VecExtendOp::Sxtl16,
|
||||||
|
rd: writable_vreg(17),
|
||||||
|
rn: vreg(19),
|
||||||
|
},
|
||||||
|
"71A6100F",
|
||||||
|
"sxtl v17.4s, v19.4h",
|
||||||
|
));
|
||||||
|
insns.push((
|
||||||
|
Inst::VecExtend {
|
||||||
|
t: VecExtendOp::Sxtl32,
|
||||||
|
rd: writable_vreg(30),
|
||||||
|
rn: vreg(6),
|
||||||
|
},
|
||||||
|
"DEA4200F",
|
||||||
|
"sxtl v30.2d, v6.2s",
|
||||||
|
));
|
||||||
|
insns.push((
|
||||||
|
Inst::VecExtend {
|
||||||
|
t: VecExtendOp::Uxtl8,
|
||||||
|
rd: writable_vreg(3),
|
||||||
|
rn: vreg(29),
|
||||||
|
},
|
||||||
|
"A3A7082F",
|
||||||
|
"uxtl v3.8h, v29.8b",
|
||||||
|
));
|
||||||
|
insns.push((
|
||||||
|
Inst::VecExtend {
|
||||||
|
t: VecExtendOp::Uxtl16,
|
||||||
|
rd: writable_vreg(15),
|
||||||
|
rn: vreg(12),
|
||||||
|
},
|
||||||
|
"8FA5102F",
|
||||||
|
"uxtl v15.4s, v12.4h",
|
||||||
|
));
|
||||||
|
insns.push((
|
||||||
|
Inst::VecExtend {
|
||||||
|
t: VecExtendOp::Uxtl32,
|
||||||
|
rd: writable_vreg(28),
|
||||||
|
rn: vreg(2),
|
||||||
|
},
|
||||||
|
"5CA4202F",
|
||||||
|
"uxtl v28.2d, v2.2s",
|
||||||
|
));
|
||||||
insns.push((
|
insns.push((
|
||||||
Inst::VecRRR {
|
Inst::VecRRR {
|
||||||
rd: writable_vreg(21),
|
rd: writable_vreg(21),
|
||||||
|
|||||||
@@ -5,7 +5,8 @@
|
|||||||
|
|
||||||
use crate::binemit::CodeOffset;
|
use crate::binemit::CodeOffset;
|
||||||
use crate::ir::types::{
|
use crate::ir::types::{
|
||||||
B1, B16, B32, B64, B8, B8X16, F32, F32X2, F64, FFLAGS, I128, I16, I32, I64, I8, I8X16, IFLAGS,
|
B1, B16, B32, B64, B8, B8X16, F32, F32X2, F64, FFLAGS, I128, I16, I16X4, I16X8, I32, I32X2,
|
||||||
|
I32X4, I64, I64X2, I8, I8X16, I8X8, IFLAGS,
|
||||||
};
|
};
|
||||||
use crate::ir::{ExternalName, Opcode, SourceLoc, TrapCode, Type};
|
use crate::ir::{ExternalName, Opcode, SourceLoc, TrapCode, Type};
|
||||||
use crate::machinst::*;
|
use crate::machinst::*;
|
||||||
@@ -186,6 +187,23 @@ pub enum FpuRoundMode {
|
|||||||
Nearest64,
|
Nearest64,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Type of vector element extensions.
|
||||||
|
#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
|
||||||
|
pub enum VecExtendOp {
|
||||||
|
/// Signed extension of 8-bit elements
|
||||||
|
Sxtl8,
|
||||||
|
/// Signed extension of 16-bit elements
|
||||||
|
Sxtl16,
|
||||||
|
/// Signed extension of 32-bit elements
|
||||||
|
Sxtl32,
|
||||||
|
/// Unsigned extension of 8-bit elements
|
||||||
|
Uxtl8,
|
||||||
|
/// Unsigned extension of 16-bit elements
|
||||||
|
Uxtl16,
|
||||||
|
/// Unsigned extension of 32-bit elements
|
||||||
|
Uxtl32,
|
||||||
|
}
|
||||||
|
|
||||||
/// A vector ALU operation.
|
/// A vector ALU operation.
|
||||||
#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
|
#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
|
||||||
pub enum VecALUOp {
|
pub enum VecALUOp {
|
||||||
@@ -667,6 +685,13 @@ pub enum Inst {
|
|||||||
rn: Reg,
|
rn: Reg,
|
||||||
},
|
},
|
||||||
|
|
||||||
|
/// Vector extend.
|
||||||
|
VecExtend {
|
||||||
|
t: VecExtendOp,
|
||||||
|
rd: Writable<Reg>,
|
||||||
|
rn: Reg,
|
||||||
|
},
|
||||||
|
|
||||||
/// A vector ALU op.
|
/// A vector ALU op.
|
||||||
VecRRR {
|
VecRRR {
|
||||||
alu_op: VecALUOp,
|
alu_op: VecALUOp,
|
||||||
@@ -1208,6 +1233,10 @@ fn aarch64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
|
|||||||
collector.add_def(rd);
|
collector.add_def(rd);
|
||||||
collector.add_use(rn);
|
collector.add_use(rn);
|
||||||
}
|
}
|
||||||
|
&Inst::VecExtend { rd, rn, .. } => {
|
||||||
|
collector.add_def(rd);
|
||||||
|
collector.add_use(rn);
|
||||||
|
}
|
||||||
&Inst::VecRRR { rd, rn, rm, .. } => {
|
&Inst::VecRRR { rd, rn, rm, .. } => {
|
||||||
collector.add_def(rd);
|
collector.add_def(rd);
|
||||||
collector.add_use(rn);
|
collector.add_use(rn);
|
||||||
@@ -1752,6 +1781,14 @@ fn aarch64_map_regs<RUM: RegUsageMapper>(inst: &mut Inst, mapper: &RUM) {
|
|||||||
map_def(mapper, rd);
|
map_def(mapper, rd);
|
||||||
map_use(mapper, rn);
|
map_use(mapper, rn);
|
||||||
}
|
}
|
||||||
|
&mut Inst::VecExtend {
|
||||||
|
ref mut rd,
|
||||||
|
ref mut rn,
|
||||||
|
..
|
||||||
|
} => {
|
||||||
|
map_def(mapper, rd);
|
||||||
|
map_use(mapper, rn);
|
||||||
|
}
|
||||||
&mut Inst::VecRRR {
|
&mut Inst::VecRRR {
|
||||||
ref mut rd,
|
ref mut rd,
|
||||||
ref mut rn,
|
ref mut rn,
|
||||||
@@ -1940,7 +1977,7 @@ impl MachInst for Inst {
|
|||||||
I8 | I16 | I32 | I64 | B1 | B8 | B16 | B32 | B64 => Ok(RegClass::I64),
|
I8 | I16 | I32 | I64 | B1 | B8 | B16 | B32 | B64 => Ok(RegClass::I64),
|
||||||
F32 | F64 => Ok(RegClass::V128),
|
F32 | F64 => Ok(RegClass::V128),
|
||||||
IFLAGS | FFLAGS => Ok(RegClass::I64),
|
IFLAGS | FFLAGS => Ok(RegClass::I64),
|
||||||
I8X16 => Ok(RegClass::V128),
|
I8X16 | I16X8 | I32X4 | I64X2 => Ok(RegClass::V128),
|
||||||
B8X16 => Ok(RegClass::V128),
|
B8X16 => Ok(RegClass::V128),
|
||||||
_ => Err(CodegenError::Unsupported(format!(
|
_ => Err(CodegenError::Unsupported(format!(
|
||||||
"Unexpected SSA-value type: {}",
|
"Unexpected SSA-value type: {}",
|
||||||
@@ -2515,6 +2552,19 @@ impl ShowWithRRU for Inst {
|
|||||||
let rn = rn.show_rru(mb_rru);
|
let rn = rn.show_rru(mb_rru);
|
||||||
format!("mov {}, {}.d[0]", rd, rn)
|
format!("mov {}, {}.d[0]", rd, rn)
|
||||||
}
|
}
|
||||||
|
&Inst::VecExtend { t, rd, rn } => {
|
||||||
|
let (op, dest, src) = match t {
|
||||||
|
VecExtendOp::Sxtl8 => ("sxtl", I16X8, I8X8),
|
||||||
|
VecExtendOp::Sxtl16 => ("sxtl", I32X4, I16X4),
|
||||||
|
VecExtendOp::Sxtl32 => ("sxtl", I64X2, I32X2),
|
||||||
|
VecExtendOp::Uxtl8 => ("uxtl", I16X8, I8X8),
|
||||||
|
VecExtendOp::Uxtl16 => ("uxtl", I32X4, I16X4),
|
||||||
|
VecExtendOp::Uxtl32 => ("uxtl", I64X2, I32X2),
|
||||||
|
};
|
||||||
|
let rd = show_vreg_vector(rd.to_reg(), mb_rru, dest);
|
||||||
|
let rn = show_vreg_vector(rn, mb_rru, src);
|
||||||
|
format!("{} {}, {}", op, rd, rn)
|
||||||
|
}
|
||||||
&Inst::VecRRR {
|
&Inst::VecRRR {
|
||||||
rd,
|
rd,
|
||||||
rn,
|
rn,
|
||||||
|
|||||||
@@ -321,6 +321,12 @@ pub fn show_vreg_vector(reg: Reg, mb_rru: Option<&RealRegUniverse>, ty: Type) ->
|
|||||||
match ty {
|
match ty {
|
||||||
I8X16 => s.push_str(".16b"),
|
I8X16 => s.push_str(".16b"),
|
||||||
F32X2 => s.push_str(".2s"),
|
F32X2 => s.push_str(".2s"),
|
||||||
|
I8X8 => s.push_str(".8b"),
|
||||||
|
I16X4 => s.push_str(".4h"),
|
||||||
|
I16X8 => s.push_str(".8h"),
|
||||||
|
I32X2 => s.push_str(".2s"),
|
||||||
|
I32X4 => s.push_str(".4s"),
|
||||||
|
I64X2 => s.push_str(".2d"),
|
||||||
_ => unimplemented!(),
|
_ => unimplemented!(),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -716,7 +716,8 @@ pub fn ty_bits(ty: Type) -> usize {
|
|||||||
B64 | I64 | F64 => 64,
|
B64 | I64 | F64 => 64,
|
||||||
B128 | I128 => 128,
|
B128 | I128 => 128,
|
||||||
IFLAGS | FFLAGS => 32,
|
IFLAGS | FFLAGS => 32,
|
||||||
I8X16 | B8X16 => 128,
|
I8X8 | I16X4 | I32X2 => 64,
|
||||||
|
B8X16 | I8X16 | I16X8 | I32X4 | I64X2 => 128,
|
||||||
_ => panic!("ty_bits() on unknown type: {:?}", ty),
|
_ => panic!("ty_bits() on unknown type: {:?}", ty),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -724,7 +725,7 @@ pub fn ty_bits(ty: Type) -> usize {
|
|||||||
pub(crate) fn ty_is_int(ty: Type) -> bool {
|
pub(crate) fn ty_is_int(ty: Type) -> bool {
|
||||||
match ty {
|
match ty {
|
||||||
B1 | B8 | I8 | B16 | I16 | B32 | I32 | B64 | I64 => true,
|
B1 | B8 | I8 | B16 | I16 | B32 | I32 | B64 | I64 => true,
|
||||||
F32 | F64 | B128 | I128 | I8X16 => false,
|
F32 | F64 | B128 | I128 | I8X8 | I8X16 | I16X4 | I16X8 | I32X2 | I32X4 | I64X2 => false,
|
||||||
IFLAGS | FFLAGS => panic!("Unexpected flags type"),
|
IFLAGS | FFLAGS => panic!("Unexpected flags type"),
|
||||||
_ => panic!("ty_is_int() on unknown type: {:?}", ty),
|
_ => panic!("ty_is_int() on unknown type: {:?}", ty),
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -829,7 +829,13 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||||||
| Opcode::Uload16Complex
|
| Opcode::Uload16Complex
|
||||||
| Opcode::Sload16Complex
|
| Opcode::Sload16Complex
|
||||||
| Opcode::Uload32Complex
|
| Opcode::Uload32Complex
|
||||||
| Opcode::Sload32Complex => {
|
| Opcode::Sload32Complex
|
||||||
|
| Opcode::Sload8x8
|
||||||
|
| Opcode::Uload8x8
|
||||||
|
| Opcode::Sload16x4
|
||||||
|
| Opcode::Uload16x4
|
||||||
|
| Opcode::Sload32x2
|
||||||
|
| Opcode::Uload32x2 => {
|
||||||
let off = ldst_offset(ctx.data(insn)).unwrap();
|
let off = ldst_offset(ctx.data(insn)).unwrap();
|
||||||
let elem_ty = match op {
|
let elem_ty = match op {
|
||||||
Opcode::Sload8 | Opcode::Uload8 | Opcode::Sload8Complex | Opcode::Uload8Complex => {
|
Opcode::Sload8 | Opcode::Uload8 | Opcode::Sload8Complex | Opcode::Uload8Complex => {
|
||||||
@@ -844,6 +850,9 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||||||
| Opcode::Sload32Complex
|
| Opcode::Sload32Complex
|
||||||
| Opcode::Uload32Complex => I32,
|
| Opcode::Uload32Complex => I32,
|
||||||
Opcode::Load | Opcode::LoadComplex => ctx.output_ty(insn, 0),
|
Opcode::Load | Opcode::LoadComplex => ctx.output_ty(insn, 0),
|
||||||
|
Opcode::Sload8x8 | Opcode::Uload8x8 => I8X8,
|
||||||
|
Opcode::Sload16x4 | Opcode::Uload16x4 => I16X4,
|
||||||
|
Opcode::Sload32x2 | Opcode::Uload32x2 => I32X2,
|
||||||
_ => unreachable!(),
|
_ => unreachable!(),
|
||||||
};
|
};
|
||||||
let sign_extend = match op {
|
let sign_extend = match op {
|
||||||
@@ -877,10 +886,30 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||||||
(32, true, false) => Inst::SLoad32 { rd, mem, srcloc },
|
(32, true, false) => Inst::SLoad32 { rd, mem, srcloc },
|
||||||
(32, _, true) => Inst::FpuLoad32 { rd, mem, srcloc },
|
(32, _, true) => Inst::FpuLoad32 { rd, mem, srcloc },
|
||||||
(64, _, false) => Inst::ULoad64 { rd, mem, srcloc },
|
(64, _, false) => Inst::ULoad64 { rd, mem, srcloc },
|
||||||
|
// Note that we treat some of the vector loads as scalar floating-point loads,
|
||||||
|
// which is correct in a little endian environment.
|
||||||
(64, _, true) => Inst::FpuLoad64 { rd, mem, srcloc },
|
(64, _, true) => Inst::FpuLoad64 { rd, mem, srcloc },
|
||||||
(128, _, _) => Inst::FpuLoad128 { rd, mem, srcloc },
|
(128, _, _) => Inst::FpuLoad128 { rd, mem, srcloc },
|
||||||
_ => panic!("Unsupported size in load"),
|
_ => panic!("Unsupported size in load"),
|
||||||
});
|
});
|
||||||
|
|
||||||
|
let vec_extend = match op {
|
||||||
|
Opcode::Sload8x8 => Some(VecExtendOp::Sxtl8),
|
||||||
|
Opcode::Uload8x8 => Some(VecExtendOp::Uxtl8),
|
||||||
|
Opcode::Sload16x4 => Some(VecExtendOp::Sxtl16),
|
||||||
|
Opcode::Uload16x4 => Some(VecExtendOp::Uxtl16),
|
||||||
|
Opcode::Sload32x2 => Some(VecExtendOp::Sxtl32),
|
||||||
|
Opcode::Uload32x2 => Some(VecExtendOp::Uxtl32),
|
||||||
|
_ => None,
|
||||||
|
};
|
||||||
|
|
||||||
|
if let Some(t) = vec_extend {
|
||||||
|
ctx.emit(Inst::VecExtend {
|
||||||
|
t,
|
||||||
|
rd,
|
||||||
|
rn: rd.to_reg(),
|
||||||
|
});
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Opcode::Store
|
Opcode::Store
|
||||||
@@ -1433,17 +1462,11 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||||||
| Opcode::Extractlane
|
| Opcode::Extractlane
|
||||||
| Opcode::ScalarToVector
|
| Opcode::ScalarToVector
|
||||||
| Opcode::Swizzle
|
| Opcode::Swizzle
|
||||||
| Opcode::Uload8x8
|
|
||||||
| Opcode::Uload8x8Complex
|
| Opcode::Uload8x8Complex
|
||||||
| Opcode::Sload8x8
|
|
||||||
| Opcode::Sload8x8Complex
|
| Opcode::Sload8x8Complex
|
||||||
| Opcode::Uload16x4
|
|
||||||
| Opcode::Uload16x4Complex
|
| Opcode::Uload16x4Complex
|
||||||
| Opcode::Sload16x4
|
|
||||||
| Opcode::Sload16x4Complex
|
| Opcode::Sload16x4Complex
|
||||||
| Opcode::Uload32x2
|
|
||||||
| Opcode::Uload32x2Complex
|
| Opcode::Uload32x2Complex
|
||||||
| Opcode::Sload32x2
|
|
||||||
| Opcode::Sload32x2Complex => {
|
| Opcode::Sload32x2Complex => {
|
||||||
// TODO
|
// TODO
|
||||||
panic!("Vector ops not implemented.");
|
panic!("Vector ops not implemented.");
|
||||||
|
|||||||
Reference in New Issue
Block a user