From d941034c2ec4600f800a31b29db40709b2a4b6bf Mon Sep 17 00:00:00 2001 From: Anton Kirilov Date: Sat, 6 Jun 2020 01:39:57 +0100 Subject: [PATCH] Enable the wast::Cranelift::spec::simd::simd_load_splat test for AArch64 Copyright (c) 2020, Arm Limited. --- build.rs | 5 +- .../codegen/src/isa/aarch64/inst/emit.rs | 28 ++++++++ .../src/isa/aarch64/inst/emit_tests.rs | 54 ++++++++++++++++ cranelift/codegen/src/isa/aarch64/inst/mod.rs | 64 ++++++++++++++++++- .../codegen/src/isa/aarch64/inst/regs.rs | 8 ++- .../codegen/src/isa/aarch64/lower_inst.rs | 13 +++- 6 files changed, 164 insertions(+), 8 deletions(-) diff --git a/build.rs b/build.rs index 59db9b2766..4c363c5c8c 100644 --- a/build.rs +++ b/build.rs @@ -181,10 +181,11 @@ fn ignore(testsuite: &str, testname: &str, strategy: &str) -> bool { }, "Cranelift" => match (testsuite, testname) { ("simd", "simd_i8x16_cmp") => return false, - ("simd", "simd_load_extend") => return false, - ("simd", "simd_store") => return false, ("simd", "simd_i16x8_cmp") => return false, ("simd", "simd_i32x4_cmp") => return false, + ("simd", "simd_load_extend") => return false, + ("simd", "simd_load_splat") => return false, + ("simd", "simd_store") => return false, // Most simd tests are known to fail on aarch64 for now, it's going // to be a big chunk of work to implement them all there! ("simd", _) if target.contains("aarch64") => return true, diff --git a/cranelift/codegen/src/isa/aarch64/inst/emit.rs b/cranelift/codegen/src/isa/aarch64/inst/emit.rs index 7d59dcfdc7..e59eab6306 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/emit.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/emit.rs @@ -1175,6 +1175,34 @@ impl MachInstEmit for Inst { | machreg_to_gpr(rd.to_reg()), ); } + &Inst::VecDup { rd, rn, ty } => { + let imm5 = match ty { + I8 => 0b00001, + I16 => 0b00010, + I32 => 0b00100, + I64 => 0b01000, + _ => unimplemented!(), + }; + sink.put4( + 0b010_01110000_00000_000011_00000_00000 + | (imm5 << 16) + | (machreg_to_gpr(rn) << 5) + | machreg_to_vec(rd.to_reg()), + ); + } + &Inst::VecDupFromFpu { rd, rn, ty } => { + let imm5 = match ty { + F32 => 0b00100, + F64 => 0b01000, + _ => unimplemented!(), + }; + sink.put4( + 0b010_01110000_00000_000001_00000_00000 + | (imm5 << 16) + | (machreg_to_vec(rn) << 5) + | machreg_to_vec(rd.to_reg()), + ); + } &Inst::VecExtend { t, rd, rn } => { let (u, immh) = match t { VecExtendOp::Sxtl8 => (0b0, 0b001), diff --git a/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs b/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs index d77d6c414f..01bb2c38e1 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs @@ -1858,6 +1858,60 @@ fn test_aarch64_binemit() { "E5979F9A", "cset x5, hi", )); + insns.push(( + Inst::VecDup { + rd: writable_vreg(25), + rn: xreg(7), + ty: I8, + }, + "F90C014E", + "dup v25.16b, w7", + )); + insns.push(( + Inst::VecDup { + rd: writable_vreg(2), + rn: xreg(23), + ty: I16, + }, + "E20E024E", + "dup v2.8h, w23", + )); + insns.push(( + Inst::VecDup { + rd: writable_vreg(0), + rn: xreg(28), + ty: I32, + }, + "800F044E", + "dup v0.4s, w28", + )); + insns.push(( + Inst::VecDup { + rd: writable_vreg(31), + rn: xreg(5), + ty: I64, + }, + "BF0C084E", + "dup v31.2d, x5", + )); + insns.push(( + Inst::VecDupFromFpu { + rd: writable_vreg(14), + rn: vreg(19), + ty: F32, + }, + "6E06044E", + "dup v14.4s, v19.s[0]", + )); + insns.push(( + Inst::VecDupFromFpu { + rd: writable_vreg(18), + rn: vreg(10), + ty: F64, + }, + "5205084E", + "dup v18.2d, v10.d[0]", + )); insns.push(( Inst::VecExtend { t: VecExtendOp::Sxtl8, diff --git a/cranelift/codegen/src/isa/aarch64/inst/mod.rs b/cranelift/codegen/src/isa/aarch64/inst/mod.rs index efe3fef9e4..1486490c55 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/mod.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/mod.rs @@ -5,8 +5,8 @@ use crate::binemit::CodeOffset; use crate::ir::types::{ - B1, B16, B16X8, B32, B32X4, B64, B64X2, B8, B8X16, F32, F32X2, F64, FFLAGS, I128, I16, I16X4, - I16X8, I32, I32X2, I32X4, I64, I64X2, I8, I8X16, I8X8, IFLAGS, + B1, B16, B16X8, B32, B32X4, B64, B64X2, B8, B8X16, F32, F32X2, F32X4, F64, F64X2, FFLAGS, I128, + I16, I16X4, I16X8, I32, I32X2, I32X4, I64, I64X2, I8, I8X16, I8X8, IFLAGS, }; use crate::ir::{ExternalName, Opcode, SourceLoc, TrapCode, Type}; use crate::machinst::*; @@ -695,6 +695,20 @@ pub enum Inst { ty: Type, }, + /// Duplicate general-purpose register to vector. + VecDup { + rd: Writable, + rn: Reg, + ty: Type, + }, + + /// Duplicate scalar to vector. + VecDupFromFpu { + rd: Writable, + rn: Reg, + ty: Type, + }, + /// Vector extend. VecExtend { t: VecExtendOp, @@ -1247,6 +1261,14 @@ fn aarch64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) { collector.add_def(rd); collector.add_use(rn); } + &Inst::VecDup { rd, rn, .. } => { + collector.add_def(rd); + collector.add_use(rn); + } + &Inst::VecDupFromFpu { rd, rn, .. } => { + collector.add_def(rd); + collector.add_use(rn); + } &Inst::VecExtend { rd, rn, .. } => { collector.add_def(rd); collector.add_use(rn); @@ -1804,6 +1826,22 @@ fn aarch64_map_regs(inst: &mut Inst, mapper: &RUM) { map_def(mapper, rd); map_use(mapper, rn); } + &mut Inst::VecDup { + ref mut rd, + ref mut rn, + .. + } => { + map_def(mapper, rd); + map_use(mapper, rn); + } + &mut Inst::VecDupFromFpu { + ref mut rd, + ref mut rn, + .. + } => { + map_def(mapper, rd); + map_use(mapper, rn); + } &mut Inst::VecExtend { ref mut rd, ref mut rn, @@ -2583,6 +2621,28 @@ impl ShowWithRRU for Inst { let rn = show_vreg_element(rn, mb_rru, idx, ty); format!("{} {}, {}", op, rd, rn) } + &Inst::VecDup { rd, rn, ty } => { + let vector_type = match ty { + I8 => I8X16, + I16 => I16X8, + I32 => I32X4, + I64 => I64X2, + _ => unimplemented!(), + }; + let rd = show_vreg_vector(rd.to_reg(), mb_rru, vector_type); + let rn = show_ireg_sized(rn, mb_rru, InstSize::from_ty(ty)); + format!("dup {}, {}", rd, rn) + } + &Inst::VecDupFromFpu { rd, rn, ty } => { + let vector_type = match ty { + F32 => F32X4, + F64 => F64X2, + _ => unimplemented!(), + }; + let rd = show_vreg_vector(rd.to_reg(), mb_rru, vector_type); + let rn = show_vreg_element(rn, mb_rru, 0, ty); + format!("dup {}, {}", rd, rn) + } &Inst::VecExtend { t, rd, rn } => { let (op, dest, src) = match t { VecExtendOp::Sxtl8 => ("sxtl", I16X8, I8X8), diff --git a/cranelift/codegen/src/isa/aarch64/inst/regs.rs b/cranelift/codegen/src/isa/aarch64/inst/regs.rs index 6cd47e5884..9d74661256 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/regs.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/regs.rs @@ -319,13 +319,15 @@ pub fn show_vreg_vector(reg: Reg, mb_rru: Option<&RealRegUniverse>, ty: Type) -> let mut s = reg.show_rru(mb_rru); match ty { - I8X16 => s.push_str(".16b"), - I16X8 => s.push_str(".8h"), - I32X4 => s.push_str(".4s"), F32X2 => s.push_str(".2s"), + F32X4 => s.push_str(".4s"), + F64X2 => s.push_str(".2d"), I8X8 => s.push_str(".8b"), + I8X16 => s.push_str(".16b"), I16X4 => s.push_str(".4h"), + I16X8 => s.push_str(".8h"), I32X2 => s.push_str(".2s"), + I32X4 => s.push_str(".4s"), I64X2 => s.push_str(".2d"), _ => unimplemented!(), } diff --git a/cranelift/codegen/src/isa/aarch64/lower_inst.rs b/cranelift/codegen/src/isa/aarch64/lower_inst.rs index ee5c0a9993..391773c472 100644 --- a/cranelift/codegen/src/isa/aarch64/lower_inst.rs +++ b/cranelift/codegen/src/isa/aarch64/lower_inst.rs @@ -1483,13 +1483,24 @@ pub(crate) fn lower_insn_to_regs>( } } + Opcode::Splat => { + let rn = input_to_reg(ctx, inputs[0], NarrowValueMode::None); + let rd = output_to_reg(ctx, outputs[0]); + let ty = ctx.input_ty(insn, 0); + let inst = if ty_is_int(ty) { + Inst::VecDup { rd, rn, ty } + } else { + Inst::VecDupFromFpu { rd, rn, ty } + }; + ctx.emit(inst); + } + Opcode::Shuffle | Opcode::Vsplit | Opcode::Vconcat | Opcode::Vselect | Opcode::VanyTrue | Opcode::VallTrue - | Opcode::Splat | Opcode::Insertlane | Opcode::ScalarToVector | Opcode::Swizzle