From 51973aefbb4cb9fd70b8038886919f4771cfa60e Mon Sep 17 00:00:00 2001 From: Johnnie Birch <45402135+jlb6740@users.noreply.github.com> Date: Thu, 3 Dec 2020 08:32:48 -0800 Subject: [PATCH] Implements x64 SIMD loads for the new backend. --- cranelift/codegen/src/isa/x64/inst/emit.rs | 12 ++++ cranelift/codegen/src/isa/x64/lower.rs | 77 +++++++++++++++++++--- 2 files changed, 81 insertions(+), 8 deletions(-) diff --git a/cranelift/codegen/src/isa/x64/inst/emit.rs b/cranelift/codegen/src/isa/x64/inst/emit.rs index 401b8aad08..bf15513665 100644 --- a/cranelift/codegen/src/isa/x64/inst/emit.rs +++ b/cranelift/codegen/src/isa/x64/inst/emit.rs @@ -1764,6 +1764,18 @@ pub(crate) fn emit( SseOpcode::Pabsb => (LegacyPrefixes::_66, 0x0F381C, 3), SseOpcode::Pabsw => (LegacyPrefixes::_66, 0x0F381D, 3), SseOpcode::Pabsd => (LegacyPrefixes::_66, 0x0F381E, 3), + SseOpcode::Pmovsxbd => (LegacyPrefixes::_66, 0x0F3821, 3), + SseOpcode::Pmovsxbw => (LegacyPrefixes::_66, 0x0F3820, 3), + SseOpcode::Pmovsxbq => (LegacyPrefixes::_66, 0x0F3822, 3), + SseOpcode::Pmovsxwd => (LegacyPrefixes::_66, 0x0F3823, 3), + SseOpcode::Pmovsxwq => (LegacyPrefixes::_66, 0x0F3824, 3), + SseOpcode::Pmovsxdq => (LegacyPrefixes::_66, 0x0F3825, 3), + SseOpcode::Pmovzxbd => (LegacyPrefixes::_66, 0x0F3831, 3), + SseOpcode::Pmovzxbw => (LegacyPrefixes::_66, 0x0F3830, 3), + SseOpcode::Pmovzxbq => (LegacyPrefixes::_66, 0x0F3832, 3), + SseOpcode::Pmovzxwd => (LegacyPrefixes::_66, 0x0F3833, 3), + SseOpcode::Pmovzxwq => (LegacyPrefixes::_66, 0x0F3834, 3), + SseOpcode::Pmovzxdq => (LegacyPrefixes::_66, 0x0F3835, 3), SseOpcode::Sqrtps => (LegacyPrefixes::None, 0x0F51, 2), SseOpcode::Sqrtpd => (LegacyPrefixes::_66, 0x0F51, 2), SseOpcode::Sqrtss => (LegacyPrefixes::_F3, 0x0F51, 2), diff --git a/cranelift/codegen/src/isa/x64/lower.rs b/cranelift/codegen/src/isa/x64/lower.rs index a01e35bc0d..21ed356cc1 100644 --- a/cranelift/codegen/src/isa/x64/lower.rs +++ b/cranelift/codegen/src/isa/x64/lower.rs @@ -3264,7 +3264,13 @@ fn lower_insn_to_regs>( | Opcode::Uload16Complex | Opcode::Sload16Complex | Opcode::Uload32Complex - | Opcode::Sload32Complex => { + | Opcode::Sload32Complex + | Opcode::Sload8x8 + | Opcode::Uload8x8 + | Opcode::Sload16x4 + | Opcode::Uload16x4 + | Opcode::Sload32x2 + | Opcode::Uload32x2 => { let offset = ctx.data(insn).load_store_offset().unwrap(); let elem_ty = match op { @@ -3279,6 +3285,18 @@ fn lower_insn_to_regs>( | Opcode::Uload32 | Opcode::Sload32Complex | Opcode::Uload32Complex => types::I32, + Opcode::Sload8x8 + | Opcode::Uload8x8 + | Opcode::Sload8x8Complex + | Opcode::Uload8x8Complex => types::I8X8, + Opcode::Sload16x4 + | Opcode::Uload16x4 + | Opcode::Sload16x4Complex + | Opcode::Uload16x4Complex => types::I16X4, + Opcode::Sload32x2 + | Opcode::Uload32x2 + | Opcode::Sload32x2Complex + | Opcode::Uload32x2Complex => types::I32X2, Opcode::Load | Opcode::LoadComplex => ctx.output_ty(insn, 0), _ => unimplemented!(), }; @@ -3291,7 +3309,13 @@ fn lower_insn_to_regs>( | Opcode::Sload16 | Opcode::Sload16Complex | Opcode::Sload32 - | Opcode::Sload32Complex => true, + | Opcode::Sload32Complex + | Opcode::Sload8x8 + | Opcode::Sload8x8Complex + | Opcode::Sload16x4 + | Opcode::Sload16x4Complex + | Opcode::Sload32x2 + | Opcode::Sload32x2Complex => true, _ => false, }; @@ -3302,7 +3326,13 @@ fn lower_insn_to_regs>( | Opcode::Uload16 | Opcode::Sload16 | Opcode::Uload32 - | Opcode::Sload32 => { + | Opcode::Sload32 + | Opcode::Sload8x8 + | Opcode::Uload8x8 + | Opcode::Sload16x4 + | Opcode::Uload16x4 + | Opcode::Sload32x2 + | Opcode::Uload32x2 => { assert_eq!(inputs.len(), 1, "only one input for load operands"); lower_to_amode(ctx, inputs[0], offset) } @@ -3313,7 +3343,13 @@ fn lower_insn_to_regs>( | Opcode::Uload16Complex | Opcode::Sload16Complex | Opcode::Uload32Complex - | Opcode::Sload32Complex => { + | Opcode::Sload32Complex + | Opcode::Sload8x8Complex + | Opcode::Uload8x8Complex + | Opcode::Sload16x4Complex + | Opcode::Uload16x4Complex + | Opcode::Sload32x2Complex + | Opcode::Uload32x2Complex => { assert_eq!( inputs.len(), 2, @@ -3325,12 +3361,12 @@ fn lower_insn_to_regs>( let flags = ctx.memflags(insn).expect("load should have memflags"); Amode::imm_reg_reg_shift(offset as u32, base, index, shift).with_flags(flags) } - _ => unreachable!(), }; let dst = get_output_reg(ctx, outputs[0]); let is_xmm = elem_ty.is_float() || elem_ty.is_vector(); + match (sign_extend, is_xmm) { (true, false) => { // The load is sign-extended only when the output size is lower than 64 bits, @@ -3350,15 +3386,40 @@ fn lower_insn_to_regs>( ctx.emit(match elem_ty { types::F32 => Inst::xmm_mov(SseOpcode::Movss, RegMem::mem(amode), dst), types::F64 => Inst::xmm_mov(SseOpcode::Movsd, RegMem::mem(amode), dst), + types::I8X8 => { + if sign_extend == true { + Inst::xmm_mov(SseOpcode::Pmovsxbw, RegMem::mem(amode), dst) + } else { + Inst::xmm_mov(SseOpcode::Pmovzxbw, RegMem::mem(amode), dst) + } + } + types::I16X4 => { + if sign_extend == true { + Inst::xmm_mov(SseOpcode::Pmovsxwd, RegMem::mem(amode), dst) + } else { + Inst::xmm_mov(SseOpcode::Pmovzxwd, RegMem::mem(amode), dst) + } + } + types::I32X2 => { + if sign_extend == true { + Inst::xmm_mov(SseOpcode::Pmovsxdq, RegMem::mem(amode), dst) + } else { + Inst::xmm_mov(SseOpcode::Pmovzxdq, RegMem::mem(amode), dst) + } + } _ if elem_ty.is_vector() && elem_ty.bits() == 128 => { Inst::xmm_mov(SseOpcode::Movups, RegMem::mem(amode), dst) - } // TODO Specialize for different types: MOVUPD, MOVDQU - _ => unreachable!("unexpected type for load: {:?}", elem_ty), + } + // TODO Specialize for different types: MOVUPD, MOVDQU + _ => unreachable!( + "unexpected type for load: {:?} - {:?}", + elem_ty, + elem_ty.bits() + ), }); } } } - Opcode::Store | Opcode::Istore8 | Opcode::Istore16