Implements x64 SIMD loads for the new backend.

This commit is contained in:
Johnnie Birch
2020-12-03 08:32:48 -08:00
parent e33326f4e0
commit 51973aefbb
2 changed files with 81 additions and 8 deletions

View File

@@ -1764,6 +1764,18 @@ pub(crate) fn emit(
SseOpcode::Pabsb => (LegacyPrefixes::_66, 0x0F381C, 3), SseOpcode::Pabsb => (LegacyPrefixes::_66, 0x0F381C, 3),
SseOpcode::Pabsw => (LegacyPrefixes::_66, 0x0F381D, 3), SseOpcode::Pabsw => (LegacyPrefixes::_66, 0x0F381D, 3),
SseOpcode::Pabsd => (LegacyPrefixes::_66, 0x0F381E, 3), SseOpcode::Pabsd => (LegacyPrefixes::_66, 0x0F381E, 3),
SseOpcode::Pmovsxbd => (LegacyPrefixes::_66, 0x0F3821, 3),
SseOpcode::Pmovsxbw => (LegacyPrefixes::_66, 0x0F3820, 3),
SseOpcode::Pmovsxbq => (LegacyPrefixes::_66, 0x0F3822, 3),
SseOpcode::Pmovsxwd => (LegacyPrefixes::_66, 0x0F3823, 3),
SseOpcode::Pmovsxwq => (LegacyPrefixes::_66, 0x0F3824, 3),
SseOpcode::Pmovsxdq => (LegacyPrefixes::_66, 0x0F3825, 3),
SseOpcode::Pmovzxbd => (LegacyPrefixes::_66, 0x0F3831, 3),
SseOpcode::Pmovzxbw => (LegacyPrefixes::_66, 0x0F3830, 3),
SseOpcode::Pmovzxbq => (LegacyPrefixes::_66, 0x0F3832, 3),
SseOpcode::Pmovzxwd => (LegacyPrefixes::_66, 0x0F3833, 3),
SseOpcode::Pmovzxwq => (LegacyPrefixes::_66, 0x0F3834, 3),
SseOpcode::Pmovzxdq => (LegacyPrefixes::_66, 0x0F3835, 3),
SseOpcode::Sqrtps => (LegacyPrefixes::None, 0x0F51, 2), SseOpcode::Sqrtps => (LegacyPrefixes::None, 0x0F51, 2),
SseOpcode::Sqrtpd => (LegacyPrefixes::_66, 0x0F51, 2), SseOpcode::Sqrtpd => (LegacyPrefixes::_66, 0x0F51, 2),
SseOpcode::Sqrtss => (LegacyPrefixes::_F3, 0x0F51, 2), SseOpcode::Sqrtss => (LegacyPrefixes::_F3, 0x0F51, 2),

View File

@@ -3264,7 +3264,13 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
| Opcode::Uload16Complex | Opcode::Uload16Complex
| Opcode::Sload16Complex | Opcode::Sload16Complex
| Opcode::Uload32Complex | Opcode::Uload32Complex
| Opcode::Sload32Complex => { | Opcode::Sload32Complex
| Opcode::Sload8x8
| Opcode::Uload8x8
| Opcode::Sload16x4
| Opcode::Uload16x4
| Opcode::Sload32x2
| Opcode::Uload32x2 => {
let offset = ctx.data(insn).load_store_offset().unwrap(); let offset = ctx.data(insn).load_store_offset().unwrap();
let elem_ty = match op { let elem_ty = match op {
@@ -3279,6 +3285,18 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
| Opcode::Uload32 | Opcode::Uload32
| Opcode::Sload32Complex | Opcode::Sload32Complex
| Opcode::Uload32Complex => types::I32, | Opcode::Uload32Complex => types::I32,
Opcode::Sload8x8
| Opcode::Uload8x8
| Opcode::Sload8x8Complex
| Opcode::Uload8x8Complex => types::I8X8,
Opcode::Sload16x4
| Opcode::Uload16x4
| Opcode::Sload16x4Complex
| Opcode::Uload16x4Complex => types::I16X4,
Opcode::Sload32x2
| Opcode::Uload32x2
| Opcode::Sload32x2Complex
| Opcode::Uload32x2Complex => types::I32X2,
Opcode::Load | Opcode::LoadComplex => ctx.output_ty(insn, 0), Opcode::Load | Opcode::LoadComplex => ctx.output_ty(insn, 0),
_ => unimplemented!(), _ => unimplemented!(),
}; };
@@ -3291,7 +3309,13 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
| Opcode::Sload16 | Opcode::Sload16
| Opcode::Sload16Complex | Opcode::Sload16Complex
| Opcode::Sload32 | Opcode::Sload32
| Opcode::Sload32Complex => true, | Opcode::Sload32Complex
| Opcode::Sload8x8
| Opcode::Sload8x8Complex
| Opcode::Sload16x4
| Opcode::Sload16x4Complex
| Opcode::Sload32x2
| Opcode::Sload32x2Complex => true,
_ => false, _ => false,
}; };
@@ -3302,7 +3326,13 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
| Opcode::Uload16 | Opcode::Uload16
| Opcode::Sload16 | Opcode::Sload16
| Opcode::Uload32 | Opcode::Uload32
| Opcode::Sload32 => { | Opcode::Sload32
| Opcode::Sload8x8
| Opcode::Uload8x8
| Opcode::Sload16x4
| Opcode::Uload16x4
| Opcode::Sload32x2
| Opcode::Uload32x2 => {
assert_eq!(inputs.len(), 1, "only one input for load operands"); assert_eq!(inputs.len(), 1, "only one input for load operands");
lower_to_amode(ctx, inputs[0], offset) lower_to_amode(ctx, inputs[0], offset)
} }
@@ -3313,7 +3343,13 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
| Opcode::Uload16Complex | Opcode::Uload16Complex
| Opcode::Sload16Complex | Opcode::Sload16Complex
| Opcode::Uload32Complex | Opcode::Uload32Complex
| Opcode::Sload32Complex => { | Opcode::Sload32Complex
| Opcode::Sload8x8Complex
| Opcode::Uload8x8Complex
| Opcode::Sload16x4Complex
| Opcode::Uload16x4Complex
| Opcode::Sload32x2Complex
| Opcode::Uload32x2Complex => {
assert_eq!( assert_eq!(
inputs.len(), inputs.len(),
2, 2,
@@ -3325,12 +3361,12 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
let flags = ctx.memflags(insn).expect("load should have memflags"); let flags = ctx.memflags(insn).expect("load should have memflags");
Amode::imm_reg_reg_shift(offset as u32, base, index, shift).with_flags(flags) Amode::imm_reg_reg_shift(offset as u32, base, index, shift).with_flags(flags)
} }
_ => unreachable!(), _ => unreachable!(),
}; };
let dst = get_output_reg(ctx, outputs[0]); let dst = get_output_reg(ctx, outputs[0]);
let is_xmm = elem_ty.is_float() || elem_ty.is_vector(); let is_xmm = elem_ty.is_float() || elem_ty.is_vector();
match (sign_extend, is_xmm) { match (sign_extend, is_xmm) {
(true, false) => { (true, false) => {
// The load is sign-extended only when the output size is lower than 64 bits, // The load is sign-extended only when the output size is lower than 64 bits,
@@ -3350,15 +3386,40 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
ctx.emit(match elem_ty { ctx.emit(match elem_ty {
types::F32 => Inst::xmm_mov(SseOpcode::Movss, RegMem::mem(amode), dst), types::F32 => Inst::xmm_mov(SseOpcode::Movss, RegMem::mem(amode), dst),
types::F64 => Inst::xmm_mov(SseOpcode::Movsd, RegMem::mem(amode), dst), types::F64 => Inst::xmm_mov(SseOpcode::Movsd, RegMem::mem(amode), dst),
types::I8X8 => {
if sign_extend == true {
Inst::xmm_mov(SseOpcode::Pmovsxbw, RegMem::mem(amode), dst)
} else {
Inst::xmm_mov(SseOpcode::Pmovzxbw, RegMem::mem(amode), dst)
}
}
types::I16X4 => {
if sign_extend == true {
Inst::xmm_mov(SseOpcode::Pmovsxwd, RegMem::mem(amode), dst)
} else {
Inst::xmm_mov(SseOpcode::Pmovzxwd, RegMem::mem(amode), dst)
}
}
types::I32X2 => {
if sign_extend == true {
Inst::xmm_mov(SseOpcode::Pmovsxdq, RegMem::mem(amode), dst)
} else {
Inst::xmm_mov(SseOpcode::Pmovzxdq, RegMem::mem(amode), dst)
}
}
_ if elem_ty.is_vector() && elem_ty.bits() == 128 => { _ if elem_ty.is_vector() && elem_ty.bits() == 128 => {
Inst::xmm_mov(SseOpcode::Movups, RegMem::mem(amode), dst) Inst::xmm_mov(SseOpcode::Movups, RegMem::mem(amode), dst)
} // TODO Specialize for different types: MOVUPD, MOVDQU }
_ => unreachable!("unexpected type for load: {:?}", elem_ty), // TODO Specialize for different types: MOVUPD, MOVDQU
_ => unreachable!(
"unexpected type for load: {:?} - {:?}",
elem_ty,
elem_ty.bits()
),
}); });
} }
} }
} }
Opcode::Store Opcode::Store
| Opcode::Istore8 | Opcode::Istore8
| Opcode::Istore16 | Opcode::Istore16