machinst x64: allow use of vector-length types

This commit is contained in:
Andrew Brown
2020-07-24 12:31:28 -07:00
parent dc6220b87c
commit 77cc2f69c1
5 changed files with 54 additions and 19 deletions

View File

@@ -93,6 +93,7 @@ fn in_int_reg(ty: types::Type) -> bool {
fn in_vec_reg(ty: types::Type) -> bool {
match ty {
types::F32 | types::F64 => true,
_ if ty.is_vector() => true,
_ => false,
}
}
@@ -365,7 +366,7 @@ impl ABIBody for X64ABIBody {
1 | 8 => Some(ExtMode::BQ),
16 => Some(ExtMode::WQ),
32 => Some(ExtMode::LQ),
64 => None,
64 | 128 => None,
_ => unreachable!(),
};

View File

@@ -226,7 +226,7 @@ impl ShowWithRRU for RegMemImm {
}
/// An operand which is either an integer Register or a value in Memory. This can denote an 8, 16,
/// 32 or 64 bit value.
/// 32, 64, or 128 bit value.
#[derive(Clone)]
pub enum RegMem {
Reg { reg: Reg },
@@ -330,8 +330,7 @@ pub(crate) enum InstructionSet {
SSE41,
}
/// Some scalar SSE operations requiring 2 operands r/m and r.
/// TODO: Below only includes scalar operations. To be seen if packed will be added here.
/// Some SSE operations requiring 2 operands r/m and r.
#[derive(Clone, Copy, PartialEq)]
pub enum SseOpcode {
Addss,
@@ -365,6 +364,10 @@ pub enum SseOpcode {
Movq,
Movss,
Movsd,
Movups,
Movupd,
Mulps,
Mulpd,
Mulss,
Mulsd,
Orps,
@@ -396,9 +399,11 @@ impl SseOpcode {
| SseOpcode::Cvttss2si
| SseOpcode::Divss
| SseOpcode::Maxss
| SseOpcode::Movaps
| SseOpcode::Minss
| SseOpcode::Movaps
| SseOpcode::Movss
| SseOpcode::Movups
| SseOpcode::Mulps
| SseOpcode::Mulss
| SseOpcode::Orps
| SseOpcode::Rcpss
@@ -425,6 +430,8 @@ impl SseOpcode {
| SseOpcode::Movd
| SseOpcode::Movq
| SseOpcode::Movsd
| SseOpcode::Movupd
| SseOpcode::Mulpd
| SseOpcode::Mulsd
| SseOpcode::Orpd
| SseOpcode::Sqrtsd
@@ -478,6 +485,10 @@ impl fmt::Debug for SseOpcode {
SseOpcode::Movq => "movq",
SseOpcode::Movss => "movss",
SseOpcode::Movsd => "movsd",
SseOpcode::Movups => "movups",
SseOpcode::Movupd => "movupd",
SseOpcode::Mulps => "mulps",
SseOpcode::Mulpd => "mulpd",
SseOpcode::Mulss => "mulss",
SseOpcode::Mulsd => "mulsd",
SseOpcode::Orpd => "orpd",

View File

@@ -1552,6 +1552,10 @@ pub(crate) fn emit(
SseOpcode::Movapd => (LegacyPrefix::_66, 0x0F28),
SseOpcode::Movsd => (LegacyPrefix::_F2, 0x0F10),
SseOpcode::Movss => (LegacyPrefix::_F3, 0x0F10),
SseOpcode::Movups => (LegacyPrefix::None, 0x0F10),
SseOpcode::Movupd => (LegacyPrefix::_66, 0x0F10),
SseOpcode::Sqrtps => (LegacyPrefix::None, 0x0F51),
SseOpcode::Sqrtpd => (LegacyPrefix::_66, 0x0F51),
SseOpcode::Sqrtss => (LegacyPrefix::_F3, 0x0F51),
SseOpcode::Sqrtsd => (LegacyPrefix::_F2, 0x0F51),
SseOpcode::Cvtss2sd => (LegacyPrefix::_F3, 0x0F5A),
@@ -1710,6 +1714,8 @@ pub(crate) fn emit(
let (prefix, opcode) = match op {
SseOpcode::Movss => (LegacyPrefix::_F3, 0x0F11),
SseOpcode::Movsd => (LegacyPrefix::_F2, 0x0F11),
SseOpcode::Movaps => (LegacyPrefix::None, 0x0F29),
SseOpcode::Movups => (LegacyPrefix::None, 0x0F11),
_ => unimplemented!("Opcode {:?} not implemented", op),
};
let dst = &dst.finalize(state);

View File

@@ -1921,6 +1921,10 @@ impl MachInst for Inst {
RegClass::V128 => match ty {
F32 => Inst::xmm_mov(SseOpcode::Movss, RegMem::reg(src_reg), dst_reg, None),
F64 => Inst::xmm_mov(SseOpcode::Movsd, RegMem::reg(src_reg), dst_reg, None),
_ if ty.is_vector() && ty.bits() == 128 => {
// TODO Specialize this move for different types: MOVUPD, MOVDQU, etc.
Inst::xmm_mov(SseOpcode::Movups, RegMem::reg(src_reg), dst_reg, None)
}
_ => panic!("unexpected type {:?} in gen_move of regclass V128", ty),
},
_ => panic!("gen_move(x64): unhandled regclass"),
@@ -1942,7 +1946,8 @@ impl MachInst for Inst {
fn rc_for_type(ty: Type) -> CodegenResult<RegClass> {
match ty {
I8 | I16 | I32 | I64 | B1 | B8 | B16 | B32 | B64 | R32 | R64 => Ok(RegClass::I64),
F32 | F64 | I128 | B128 => Ok(RegClass::V128),
F32 | F64 => Ok(RegClass::V128),
_ if ty.bits() == 128 => Ok(RegClass::V128),
IFLAGS | FFLAGS => Ok(RegClass::I64),
_ => Err(CodegenError::Unsupported(format!(
"Unexpected SSA-value type: {}",

View File

@@ -1475,8 +1475,6 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
_ => false,
};
let is_float = is_float_ty(elem_ty);
let addr = match op {
Opcode::Load
| Opcode::Uload8
@@ -1513,7 +1511,8 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
let srcloc = Some(ctx.srcloc(insn));
let dst = output_to_reg(ctx, outputs[0]);
match (sign_extend, is_float) {
let is_xmm = elem_ty.is_float() || elem_ty.is_vector();
match (sign_extend, is_xmm) {
(true, false) => {
// The load is sign-extended only when the output size is lower than 64 bits,
// so ext-mode is defined in this case.
@@ -1542,6 +1541,9 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
ctx.emit(match elem_ty {
F32 => Inst::xmm_mov(SseOpcode::Movss, RegMem::mem(addr), dst, srcloc),
F64 => Inst::xmm_mov(SseOpcode::Movsd, RegMem::mem(addr), dst, srcloc),
_ if elem_ty.is_vector() && elem_ty.bits() == 128 => {
Inst::xmm_mov(SseOpcode::Movups, RegMem::mem(addr), dst, srcloc)
} // TODO Specialize for different types: MOVUPD, MOVDQU
_ => unreachable!("unexpected type for load: {:?}", elem_ty),
});
}
@@ -1565,7 +1567,6 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
Opcode::Store | Opcode::StoreComplex => ctx.input_ty(insn, 0),
_ => unreachable!(),
};
let is_float = is_float_ty(elem_ty);
let addr = match op {
Opcode::Store | Opcode::Istore8 | Opcode::Istore16 | Opcode::Istore32 => {
@@ -1599,15 +1600,15 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
let srcloc = Some(ctx.srcloc(insn));
if is_float {
ctx.emit(match elem_ty {
F32 => Inst::xmm_mov_r_m(SseOpcode::Movss, src, addr, srcloc),
F64 => Inst::xmm_mov_r_m(SseOpcode::Movsd, src, addr, srcloc),
_ => panic!("unexpected type for store {:?}", elem_ty),
});
} else {
ctx.emit(Inst::mov_r_m(elem_ty.bytes() as u8, src, addr, srcloc));
}
ctx.emit(match elem_ty {
F32 => Inst::xmm_mov_r_m(SseOpcode::Movss, src, addr, srcloc),
F64 => Inst::xmm_mov_r_m(SseOpcode::Movsd, src, addr, srcloc),
_ if elem_ty.is_vector() && elem_ty.bits() == 128 => {
// TODO Specialize for different types: MOVUPD, MOVDQU, etc.
Inst::xmm_mov_r_m(SseOpcode::Movups, src, addr, srcloc)
}
_ => Inst::mov_r_m(elem_ty.bytes() as u8, src, addr, srcloc),
});
}
Opcode::FuncAddr => {
@@ -1815,6 +1816,17 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
));
}
Opcode::RawBitcast => {
// A raw_bitcast is just a mechanism for correcting the type of V128 values (see
// https://github.com/bytecodealliance/wasmtime/issues/1147). As such, this IR
// instruction should emit no machine code but a move is necessary to give the register
// allocator a definition for the output virtual register.
let src = input_to_reg(ctx, inputs[0]);
let dst = output_to_reg(ctx, outputs[0]);
let ty = ty.unwrap();
ctx.emit(Inst::gen_move(dst, src, ty));
}
Opcode::IaddImm
| Opcode::ImulImm
| Opcode::UdivImm