Implements x64 SIMD loads for the new backend.
This commit is contained in:
@@ -1764,6 +1764,18 @@ pub(crate) fn emit(
|
|||||||
SseOpcode::Pabsb => (LegacyPrefixes::_66, 0x0F381C, 3),
|
SseOpcode::Pabsb => (LegacyPrefixes::_66, 0x0F381C, 3),
|
||||||
SseOpcode::Pabsw => (LegacyPrefixes::_66, 0x0F381D, 3),
|
SseOpcode::Pabsw => (LegacyPrefixes::_66, 0x0F381D, 3),
|
||||||
SseOpcode::Pabsd => (LegacyPrefixes::_66, 0x0F381E, 3),
|
SseOpcode::Pabsd => (LegacyPrefixes::_66, 0x0F381E, 3),
|
||||||
|
SseOpcode::Pmovsxbd => (LegacyPrefixes::_66, 0x0F3821, 3),
|
||||||
|
SseOpcode::Pmovsxbw => (LegacyPrefixes::_66, 0x0F3820, 3),
|
||||||
|
SseOpcode::Pmovsxbq => (LegacyPrefixes::_66, 0x0F3822, 3),
|
||||||
|
SseOpcode::Pmovsxwd => (LegacyPrefixes::_66, 0x0F3823, 3),
|
||||||
|
SseOpcode::Pmovsxwq => (LegacyPrefixes::_66, 0x0F3824, 3),
|
||||||
|
SseOpcode::Pmovsxdq => (LegacyPrefixes::_66, 0x0F3825, 3),
|
||||||
|
SseOpcode::Pmovzxbd => (LegacyPrefixes::_66, 0x0F3831, 3),
|
||||||
|
SseOpcode::Pmovzxbw => (LegacyPrefixes::_66, 0x0F3830, 3),
|
||||||
|
SseOpcode::Pmovzxbq => (LegacyPrefixes::_66, 0x0F3832, 3),
|
||||||
|
SseOpcode::Pmovzxwd => (LegacyPrefixes::_66, 0x0F3833, 3),
|
||||||
|
SseOpcode::Pmovzxwq => (LegacyPrefixes::_66, 0x0F3834, 3),
|
||||||
|
SseOpcode::Pmovzxdq => (LegacyPrefixes::_66, 0x0F3835, 3),
|
||||||
SseOpcode::Sqrtps => (LegacyPrefixes::None, 0x0F51, 2),
|
SseOpcode::Sqrtps => (LegacyPrefixes::None, 0x0F51, 2),
|
||||||
SseOpcode::Sqrtpd => (LegacyPrefixes::_66, 0x0F51, 2),
|
SseOpcode::Sqrtpd => (LegacyPrefixes::_66, 0x0F51, 2),
|
||||||
SseOpcode::Sqrtss => (LegacyPrefixes::_F3, 0x0F51, 2),
|
SseOpcode::Sqrtss => (LegacyPrefixes::_F3, 0x0F51, 2),
|
||||||
|
|||||||
@@ -3264,7 +3264,13 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||||||
| Opcode::Uload16Complex
|
| Opcode::Uload16Complex
|
||||||
| Opcode::Sload16Complex
|
| Opcode::Sload16Complex
|
||||||
| Opcode::Uload32Complex
|
| Opcode::Uload32Complex
|
||||||
| Opcode::Sload32Complex => {
|
| Opcode::Sload32Complex
|
||||||
|
| Opcode::Sload8x8
|
||||||
|
| Opcode::Uload8x8
|
||||||
|
| Opcode::Sload16x4
|
||||||
|
| Opcode::Uload16x4
|
||||||
|
| Opcode::Sload32x2
|
||||||
|
| Opcode::Uload32x2 => {
|
||||||
let offset = ctx.data(insn).load_store_offset().unwrap();
|
let offset = ctx.data(insn).load_store_offset().unwrap();
|
||||||
|
|
||||||
let elem_ty = match op {
|
let elem_ty = match op {
|
||||||
@@ -3279,6 +3285,18 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||||||
| Opcode::Uload32
|
| Opcode::Uload32
|
||||||
| Opcode::Sload32Complex
|
| Opcode::Sload32Complex
|
||||||
| Opcode::Uload32Complex => types::I32,
|
| Opcode::Uload32Complex => types::I32,
|
||||||
|
Opcode::Sload8x8
|
||||||
|
| Opcode::Uload8x8
|
||||||
|
| Opcode::Sload8x8Complex
|
||||||
|
| Opcode::Uload8x8Complex => types::I8X8,
|
||||||
|
Opcode::Sload16x4
|
||||||
|
| Opcode::Uload16x4
|
||||||
|
| Opcode::Sload16x4Complex
|
||||||
|
| Opcode::Uload16x4Complex => types::I16X4,
|
||||||
|
Opcode::Sload32x2
|
||||||
|
| Opcode::Uload32x2
|
||||||
|
| Opcode::Sload32x2Complex
|
||||||
|
| Opcode::Uload32x2Complex => types::I32X2,
|
||||||
Opcode::Load | Opcode::LoadComplex => ctx.output_ty(insn, 0),
|
Opcode::Load | Opcode::LoadComplex => ctx.output_ty(insn, 0),
|
||||||
_ => unimplemented!(),
|
_ => unimplemented!(),
|
||||||
};
|
};
|
||||||
@@ -3291,7 +3309,13 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||||||
| Opcode::Sload16
|
| Opcode::Sload16
|
||||||
| Opcode::Sload16Complex
|
| Opcode::Sload16Complex
|
||||||
| Opcode::Sload32
|
| Opcode::Sload32
|
||||||
| Opcode::Sload32Complex => true,
|
| Opcode::Sload32Complex
|
||||||
|
| Opcode::Sload8x8
|
||||||
|
| Opcode::Sload8x8Complex
|
||||||
|
| Opcode::Sload16x4
|
||||||
|
| Opcode::Sload16x4Complex
|
||||||
|
| Opcode::Sload32x2
|
||||||
|
| Opcode::Sload32x2Complex => true,
|
||||||
_ => false,
|
_ => false,
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -3302,7 +3326,13 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||||||
| Opcode::Uload16
|
| Opcode::Uload16
|
||||||
| Opcode::Sload16
|
| Opcode::Sload16
|
||||||
| Opcode::Uload32
|
| Opcode::Uload32
|
||||||
| Opcode::Sload32 => {
|
| Opcode::Sload32
|
||||||
|
| Opcode::Sload8x8
|
||||||
|
| Opcode::Uload8x8
|
||||||
|
| Opcode::Sload16x4
|
||||||
|
| Opcode::Uload16x4
|
||||||
|
| Opcode::Sload32x2
|
||||||
|
| Opcode::Uload32x2 => {
|
||||||
assert_eq!(inputs.len(), 1, "only one input for load operands");
|
assert_eq!(inputs.len(), 1, "only one input for load operands");
|
||||||
lower_to_amode(ctx, inputs[0], offset)
|
lower_to_amode(ctx, inputs[0], offset)
|
||||||
}
|
}
|
||||||
@@ -3313,7 +3343,13 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||||||
| Opcode::Uload16Complex
|
| Opcode::Uload16Complex
|
||||||
| Opcode::Sload16Complex
|
| Opcode::Sload16Complex
|
||||||
| Opcode::Uload32Complex
|
| Opcode::Uload32Complex
|
||||||
| Opcode::Sload32Complex => {
|
| Opcode::Sload32Complex
|
||||||
|
| Opcode::Sload8x8Complex
|
||||||
|
| Opcode::Uload8x8Complex
|
||||||
|
| Opcode::Sload16x4Complex
|
||||||
|
| Opcode::Uload16x4Complex
|
||||||
|
| Opcode::Sload32x2Complex
|
||||||
|
| Opcode::Uload32x2Complex => {
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
inputs.len(),
|
inputs.len(),
|
||||||
2,
|
2,
|
||||||
@@ -3325,12 +3361,12 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||||||
let flags = ctx.memflags(insn).expect("load should have memflags");
|
let flags = ctx.memflags(insn).expect("load should have memflags");
|
||||||
Amode::imm_reg_reg_shift(offset as u32, base, index, shift).with_flags(flags)
|
Amode::imm_reg_reg_shift(offset as u32, base, index, shift).with_flags(flags)
|
||||||
}
|
}
|
||||||
|
|
||||||
_ => unreachable!(),
|
_ => unreachable!(),
|
||||||
};
|
};
|
||||||
|
|
||||||
let dst = get_output_reg(ctx, outputs[0]);
|
let dst = get_output_reg(ctx, outputs[0]);
|
||||||
let is_xmm = elem_ty.is_float() || elem_ty.is_vector();
|
let is_xmm = elem_ty.is_float() || elem_ty.is_vector();
|
||||||
|
|
||||||
match (sign_extend, is_xmm) {
|
match (sign_extend, is_xmm) {
|
||||||
(true, false) => {
|
(true, false) => {
|
||||||
// The load is sign-extended only when the output size is lower than 64 bits,
|
// The load is sign-extended only when the output size is lower than 64 bits,
|
||||||
@@ -3350,15 +3386,40 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||||||
ctx.emit(match elem_ty {
|
ctx.emit(match elem_ty {
|
||||||
types::F32 => Inst::xmm_mov(SseOpcode::Movss, RegMem::mem(amode), dst),
|
types::F32 => Inst::xmm_mov(SseOpcode::Movss, RegMem::mem(amode), dst),
|
||||||
types::F64 => Inst::xmm_mov(SseOpcode::Movsd, RegMem::mem(amode), dst),
|
types::F64 => Inst::xmm_mov(SseOpcode::Movsd, RegMem::mem(amode), dst),
|
||||||
|
types::I8X8 => {
|
||||||
|
if sign_extend == true {
|
||||||
|
Inst::xmm_mov(SseOpcode::Pmovsxbw, RegMem::mem(amode), dst)
|
||||||
|
} else {
|
||||||
|
Inst::xmm_mov(SseOpcode::Pmovzxbw, RegMem::mem(amode), dst)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
types::I16X4 => {
|
||||||
|
if sign_extend == true {
|
||||||
|
Inst::xmm_mov(SseOpcode::Pmovsxwd, RegMem::mem(amode), dst)
|
||||||
|
} else {
|
||||||
|
Inst::xmm_mov(SseOpcode::Pmovzxwd, RegMem::mem(amode), dst)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
types::I32X2 => {
|
||||||
|
if sign_extend == true {
|
||||||
|
Inst::xmm_mov(SseOpcode::Pmovsxdq, RegMem::mem(amode), dst)
|
||||||
|
} else {
|
||||||
|
Inst::xmm_mov(SseOpcode::Pmovzxdq, RegMem::mem(amode), dst)
|
||||||
|
}
|
||||||
|
}
|
||||||
_ if elem_ty.is_vector() && elem_ty.bits() == 128 => {
|
_ if elem_ty.is_vector() && elem_ty.bits() == 128 => {
|
||||||
Inst::xmm_mov(SseOpcode::Movups, RegMem::mem(amode), dst)
|
Inst::xmm_mov(SseOpcode::Movups, RegMem::mem(amode), dst)
|
||||||
} // TODO Specialize for different types: MOVUPD, MOVDQU
|
}
|
||||||
_ => unreachable!("unexpected type for load: {:?}", elem_ty),
|
// TODO Specialize for different types: MOVUPD, MOVDQU
|
||||||
|
_ => unreachable!(
|
||||||
|
"unexpected type for load: {:?} - {:?}",
|
||||||
|
elem_ty,
|
||||||
|
elem_ty.bits()
|
||||||
|
),
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Opcode::Store
|
Opcode::Store
|
||||||
| Opcode::Istore8
|
| Opcode::Istore8
|
||||||
| Opcode::Istore16
|
| Opcode::Istore16
|
||||||
|
|||||||
Reference in New Issue
Block a user