machinst x64: allow use of vector-length types

This commit is contained in:
Andrew Brown
2020-07-24 12:31:28 -07:00
parent dc6220b87c
commit 77cc2f69c1
5 changed files with 54 additions and 19 deletions

View File

@@ -93,6 +93,7 @@ fn in_int_reg(ty: types::Type) -> bool {
fn in_vec_reg(ty: types::Type) -> bool { fn in_vec_reg(ty: types::Type) -> bool {
match ty { match ty {
types::F32 | types::F64 => true, types::F32 | types::F64 => true,
_ if ty.is_vector() => true,
_ => false, _ => false,
} }
} }
@@ -365,7 +366,7 @@ impl ABIBody for X64ABIBody {
1 | 8 => Some(ExtMode::BQ), 1 | 8 => Some(ExtMode::BQ),
16 => Some(ExtMode::WQ), 16 => Some(ExtMode::WQ),
32 => Some(ExtMode::LQ), 32 => Some(ExtMode::LQ),
64 => None, 64 | 128 => None,
_ => unreachable!(), _ => unreachable!(),
}; };

View File

@@ -226,7 +226,7 @@ impl ShowWithRRU for RegMemImm {
} }
/// An operand which is either an integer Register or a value in Memory. This can denote an 8, 16, /// An operand which is either an integer Register or a value in Memory. This can denote an 8, 16,
/// 32 or 64 bit value. /// 32, 64, or 128 bit value.
#[derive(Clone)] #[derive(Clone)]
pub enum RegMem { pub enum RegMem {
Reg { reg: Reg }, Reg { reg: Reg },
@@ -330,8 +330,7 @@ pub(crate) enum InstructionSet {
SSE41, SSE41,
} }
/// Some scalar SSE operations requiring 2 operands r/m and r. /// Some SSE operations requiring 2 operands r/m and r.
/// TODO: Below only includes scalar operations. To be seen if packed will be added here.
#[derive(Clone, Copy, PartialEq)] #[derive(Clone, Copy, PartialEq)]
pub enum SseOpcode { pub enum SseOpcode {
Addss, Addss,
@@ -365,6 +364,10 @@ pub enum SseOpcode {
Movq, Movq,
Movss, Movss,
Movsd, Movsd,
Movups,
Movupd,
Mulps,
Mulpd,
Mulss, Mulss,
Mulsd, Mulsd,
Orps, Orps,
@@ -396,9 +399,11 @@ impl SseOpcode {
| SseOpcode::Cvttss2si | SseOpcode::Cvttss2si
| SseOpcode::Divss | SseOpcode::Divss
| SseOpcode::Maxss | SseOpcode::Maxss
| SseOpcode::Movaps
| SseOpcode::Minss | SseOpcode::Minss
| SseOpcode::Movaps
| SseOpcode::Movss | SseOpcode::Movss
| SseOpcode::Movups
| SseOpcode::Mulps
| SseOpcode::Mulss | SseOpcode::Mulss
| SseOpcode::Orps | SseOpcode::Orps
| SseOpcode::Rcpss | SseOpcode::Rcpss
@@ -425,6 +430,8 @@ impl SseOpcode {
| SseOpcode::Movd | SseOpcode::Movd
| SseOpcode::Movq | SseOpcode::Movq
| SseOpcode::Movsd | SseOpcode::Movsd
| SseOpcode::Movupd
| SseOpcode::Mulpd
| SseOpcode::Mulsd | SseOpcode::Mulsd
| SseOpcode::Orpd | SseOpcode::Orpd
| SseOpcode::Sqrtsd | SseOpcode::Sqrtsd
@@ -478,6 +485,10 @@ impl fmt::Debug for SseOpcode {
SseOpcode::Movq => "movq", SseOpcode::Movq => "movq",
SseOpcode::Movss => "movss", SseOpcode::Movss => "movss",
SseOpcode::Movsd => "movsd", SseOpcode::Movsd => "movsd",
SseOpcode::Movups => "movups",
SseOpcode::Movupd => "movupd",
SseOpcode::Mulps => "mulps",
SseOpcode::Mulpd => "mulpd",
SseOpcode::Mulss => "mulss", SseOpcode::Mulss => "mulss",
SseOpcode::Mulsd => "mulsd", SseOpcode::Mulsd => "mulsd",
SseOpcode::Orpd => "orpd", SseOpcode::Orpd => "orpd",

View File

@@ -1552,6 +1552,10 @@ pub(crate) fn emit(
SseOpcode::Movapd => (LegacyPrefix::_66, 0x0F28), SseOpcode::Movapd => (LegacyPrefix::_66, 0x0F28),
SseOpcode::Movsd => (LegacyPrefix::_F2, 0x0F10), SseOpcode::Movsd => (LegacyPrefix::_F2, 0x0F10),
SseOpcode::Movss => (LegacyPrefix::_F3, 0x0F10), SseOpcode::Movss => (LegacyPrefix::_F3, 0x0F10),
SseOpcode::Movups => (LegacyPrefix::None, 0x0F10),
SseOpcode::Movupd => (LegacyPrefix::_66, 0x0F10),
SseOpcode::Sqrtps => (LegacyPrefix::None, 0x0F51),
SseOpcode::Sqrtpd => (LegacyPrefix::_66, 0x0F51),
SseOpcode::Sqrtss => (LegacyPrefix::_F3, 0x0F51), SseOpcode::Sqrtss => (LegacyPrefix::_F3, 0x0F51),
SseOpcode::Sqrtsd => (LegacyPrefix::_F2, 0x0F51), SseOpcode::Sqrtsd => (LegacyPrefix::_F2, 0x0F51),
SseOpcode::Cvtss2sd => (LegacyPrefix::_F3, 0x0F5A), SseOpcode::Cvtss2sd => (LegacyPrefix::_F3, 0x0F5A),
@@ -1710,6 +1714,8 @@ pub(crate) fn emit(
let (prefix, opcode) = match op { let (prefix, opcode) = match op {
SseOpcode::Movss => (LegacyPrefix::_F3, 0x0F11), SseOpcode::Movss => (LegacyPrefix::_F3, 0x0F11),
SseOpcode::Movsd => (LegacyPrefix::_F2, 0x0F11), SseOpcode::Movsd => (LegacyPrefix::_F2, 0x0F11),
SseOpcode::Movaps => (LegacyPrefix::None, 0x0F29),
SseOpcode::Movups => (LegacyPrefix::None, 0x0F11),
_ => unimplemented!("Opcode {:?} not implemented", op), _ => unimplemented!("Opcode {:?} not implemented", op),
}; };
let dst = &dst.finalize(state); let dst = &dst.finalize(state);

View File

@@ -1921,6 +1921,10 @@ impl MachInst for Inst {
RegClass::V128 => match ty { RegClass::V128 => match ty {
F32 => Inst::xmm_mov(SseOpcode::Movss, RegMem::reg(src_reg), dst_reg, None), F32 => Inst::xmm_mov(SseOpcode::Movss, RegMem::reg(src_reg), dst_reg, None),
F64 => Inst::xmm_mov(SseOpcode::Movsd, RegMem::reg(src_reg), dst_reg, None), F64 => Inst::xmm_mov(SseOpcode::Movsd, RegMem::reg(src_reg), dst_reg, None),
_ if ty.is_vector() && ty.bits() == 128 => {
// TODO Specialize this move for different types: MOVUPD, MOVDQU, etc.
Inst::xmm_mov(SseOpcode::Movups, RegMem::reg(src_reg), dst_reg, None)
}
_ => panic!("unexpected type {:?} in gen_move of regclass V128", ty), _ => panic!("unexpected type {:?} in gen_move of regclass V128", ty),
}, },
_ => panic!("gen_move(x64): unhandled regclass"), _ => panic!("gen_move(x64): unhandled regclass"),
@@ -1942,7 +1946,8 @@ impl MachInst for Inst {
fn rc_for_type(ty: Type) -> CodegenResult<RegClass> { fn rc_for_type(ty: Type) -> CodegenResult<RegClass> {
match ty { match ty {
I8 | I16 | I32 | I64 | B1 | B8 | B16 | B32 | B64 | R32 | R64 => Ok(RegClass::I64), I8 | I16 | I32 | I64 | B1 | B8 | B16 | B32 | B64 | R32 | R64 => Ok(RegClass::I64),
F32 | F64 | I128 | B128 => Ok(RegClass::V128), F32 | F64 => Ok(RegClass::V128),
_ if ty.bits() == 128 => Ok(RegClass::V128),
IFLAGS | FFLAGS => Ok(RegClass::I64), IFLAGS | FFLAGS => Ok(RegClass::I64),
_ => Err(CodegenError::Unsupported(format!( _ => Err(CodegenError::Unsupported(format!(
"Unexpected SSA-value type: {}", "Unexpected SSA-value type: {}",

View File

@@ -1475,8 +1475,6 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
_ => false, _ => false,
}; };
let is_float = is_float_ty(elem_ty);
let addr = match op { let addr = match op {
Opcode::Load Opcode::Load
| Opcode::Uload8 | Opcode::Uload8
@@ -1513,7 +1511,8 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
let srcloc = Some(ctx.srcloc(insn)); let srcloc = Some(ctx.srcloc(insn));
let dst = output_to_reg(ctx, outputs[0]); let dst = output_to_reg(ctx, outputs[0]);
match (sign_extend, is_float) { let is_xmm = elem_ty.is_float() || elem_ty.is_vector();
match (sign_extend, is_xmm) {
(true, false) => { (true, false) => {
// The load is sign-extended only when the output size is lower than 64 bits, // The load is sign-extended only when the output size is lower than 64 bits,
// so ext-mode is defined in this case. // so ext-mode is defined in this case.
@@ -1542,6 +1541,9 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
ctx.emit(match elem_ty { ctx.emit(match elem_ty {
F32 => Inst::xmm_mov(SseOpcode::Movss, RegMem::mem(addr), dst, srcloc), F32 => Inst::xmm_mov(SseOpcode::Movss, RegMem::mem(addr), dst, srcloc),
F64 => Inst::xmm_mov(SseOpcode::Movsd, RegMem::mem(addr), dst, srcloc), F64 => Inst::xmm_mov(SseOpcode::Movsd, RegMem::mem(addr), dst, srcloc),
_ if elem_ty.is_vector() && elem_ty.bits() == 128 => {
Inst::xmm_mov(SseOpcode::Movups, RegMem::mem(addr), dst, srcloc)
} // TODO Specialize for different types: MOVUPD, MOVDQU
_ => unreachable!("unexpected type for load: {:?}", elem_ty), _ => unreachable!("unexpected type for load: {:?}", elem_ty),
}); });
} }
@@ -1565,7 +1567,6 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
Opcode::Store | Opcode::StoreComplex => ctx.input_ty(insn, 0), Opcode::Store | Opcode::StoreComplex => ctx.input_ty(insn, 0),
_ => unreachable!(), _ => unreachable!(),
}; };
let is_float = is_float_ty(elem_ty);
let addr = match op { let addr = match op {
Opcode::Store | Opcode::Istore8 | Opcode::Istore16 | Opcode::Istore32 => { Opcode::Store | Opcode::Istore8 | Opcode::Istore16 | Opcode::Istore32 => {
@@ -1599,15 +1600,15 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
let srcloc = Some(ctx.srcloc(insn)); let srcloc = Some(ctx.srcloc(insn));
if is_float { ctx.emit(match elem_ty {
ctx.emit(match elem_ty { F32 => Inst::xmm_mov_r_m(SseOpcode::Movss, src, addr, srcloc),
F32 => Inst::xmm_mov_r_m(SseOpcode::Movss, src, addr, srcloc), F64 => Inst::xmm_mov_r_m(SseOpcode::Movsd, src, addr, srcloc),
F64 => Inst::xmm_mov_r_m(SseOpcode::Movsd, src, addr, srcloc), _ if elem_ty.is_vector() && elem_ty.bits() == 128 => {
_ => panic!("unexpected type for store {:?}", elem_ty), // TODO Specialize for different types: MOVUPD, MOVDQU, etc.
}); Inst::xmm_mov_r_m(SseOpcode::Movups, src, addr, srcloc)
} else { }
ctx.emit(Inst::mov_r_m(elem_ty.bytes() as u8, src, addr, srcloc)); _ => Inst::mov_r_m(elem_ty.bytes() as u8, src, addr, srcloc),
} });
} }
Opcode::FuncAddr => { Opcode::FuncAddr => {
@@ -1815,6 +1816,17 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
)); ));
} }
Opcode::RawBitcast => {
// A raw_bitcast is just a mechanism for correcting the type of V128 values (see
// https://github.com/bytecodealliance/wasmtime/issues/1147). As such, this IR
// instruction should emit no machine code but a move is necessary to give the register
// allocator a definition for the output virtual register.
let src = input_to_reg(ctx, inputs[0]);
let dst = output_to_reg(ctx, outputs[0]);
let ty = ty.unwrap();
ctx.emit(Inst::gen_move(dst, src, ty));
}
Opcode::IaddImm Opcode::IaddImm
| Opcode::ImulImm | Opcode::ImulImm
| Opcode::UdivImm | Opcode::UdivImm