From 77cc2f69c1736180643d1042a4b2a8558441109d Mon Sep 17 00:00:00 2001 From: Andrew Brown Date: Fri, 24 Jul 2020 12:31:28 -0700 Subject: [PATCH] machinst x64: allow use of vector-length types --- cranelift/codegen/src/isa/x64/abi.rs | 3 +- cranelift/codegen/src/isa/x64/inst/args.rs | 19 ++++++++--- cranelift/codegen/src/isa/x64/inst/emit.rs | 6 ++++ cranelift/codegen/src/isa/x64/inst/mod.rs | 7 +++- cranelift/codegen/src/isa/x64/lower.rs | 38 ++++++++++++++-------- 5 files changed, 54 insertions(+), 19 deletions(-) diff --git a/cranelift/codegen/src/isa/x64/abi.rs b/cranelift/codegen/src/isa/x64/abi.rs index 1989fb8dce..a6e9e1db48 100644 --- a/cranelift/codegen/src/isa/x64/abi.rs +++ b/cranelift/codegen/src/isa/x64/abi.rs @@ -93,6 +93,7 @@ fn in_int_reg(ty: types::Type) -> bool { fn in_vec_reg(ty: types::Type) -> bool { match ty { types::F32 | types::F64 => true, + _ if ty.is_vector() => true, _ => false, } } @@ -365,7 +366,7 @@ impl ABIBody for X64ABIBody { 1 | 8 => Some(ExtMode::BQ), 16 => Some(ExtMode::WQ), 32 => Some(ExtMode::LQ), - 64 => None, + 64 | 128 => None, _ => unreachable!(), }; diff --git a/cranelift/codegen/src/isa/x64/inst/args.rs b/cranelift/codegen/src/isa/x64/inst/args.rs index f469be43c7..634d4eb6ea 100644 --- a/cranelift/codegen/src/isa/x64/inst/args.rs +++ b/cranelift/codegen/src/isa/x64/inst/args.rs @@ -226,7 +226,7 @@ impl ShowWithRRU for RegMemImm { } /// An operand which is either an integer Register or a value in Memory. This can denote an 8, 16, -/// 32 or 64 bit value. +/// 32, 64, or 128 bit value. #[derive(Clone)] pub enum RegMem { Reg { reg: Reg }, @@ -330,8 +330,7 @@ pub(crate) enum InstructionSet { SSE41, } -/// Some scalar SSE operations requiring 2 operands r/m and r. -/// TODO: Below only includes scalar operations. To be seen if packed will be added here. +/// Some SSE operations requiring 2 operands r/m and r. #[derive(Clone, Copy, PartialEq)] pub enum SseOpcode { Addss, @@ -365,6 +364,10 @@ pub enum SseOpcode { Movq, Movss, Movsd, + Movups, + Movupd, + Mulps, + Mulpd, Mulss, Mulsd, Orps, @@ -396,9 +399,11 @@ impl SseOpcode { | SseOpcode::Cvttss2si | SseOpcode::Divss | SseOpcode::Maxss - | SseOpcode::Movaps | SseOpcode::Minss + | SseOpcode::Movaps | SseOpcode::Movss + | SseOpcode::Movups + | SseOpcode::Mulps | SseOpcode::Mulss | SseOpcode::Orps | SseOpcode::Rcpss @@ -425,6 +430,8 @@ impl SseOpcode { | SseOpcode::Movd | SseOpcode::Movq | SseOpcode::Movsd + | SseOpcode::Movupd + | SseOpcode::Mulpd | SseOpcode::Mulsd | SseOpcode::Orpd | SseOpcode::Sqrtsd @@ -478,6 +485,10 @@ impl fmt::Debug for SseOpcode { SseOpcode::Movq => "movq", SseOpcode::Movss => "movss", SseOpcode::Movsd => "movsd", + SseOpcode::Movups => "movups", + SseOpcode::Movupd => "movupd", + SseOpcode::Mulps => "mulps", + SseOpcode::Mulpd => "mulpd", SseOpcode::Mulss => "mulss", SseOpcode::Mulsd => "mulsd", SseOpcode::Orpd => "orpd", diff --git a/cranelift/codegen/src/isa/x64/inst/emit.rs b/cranelift/codegen/src/isa/x64/inst/emit.rs index ecfa104549..49cc20acae 100644 --- a/cranelift/codegen/src/isa/x64/inst/emit.rs +++ b/cranelift/codegen/src/isa/x64/inst/emit.rs @@ -1552,6 +1552,10 @@ pub(crate) fn emit( SseOpcode::Movapd => (LegacyPrefix::_66, 0x0F28), SseOpcode::Movsd => (LegacyPrefix::_F2, 0x0F10), SseOpcode::Movss => (LegacyPrefix::_F3, 0x0F10), + SseOpcode::Movups => (LegacyPrefix::None, 0x0F10), + SseOpcode::Movupd => (LegacyPrefix::_66, 0x0F10), + SseOpcode::Sqrtps => (LegacyPrefix::None, 0x0F51), + SseOpcode::Sqrtpd => (LegacyPrefix::_66, 0x0F51), SseOpcode::Sqrtss => (LegacyPrefix::_F3, 0x0F51), SseOpcode::Sqrtsd => (LegacyPrefix::_F2, 0x0F51), SseOpcode::Cvtss2sd => (LegacyPrefix::_F3, 0x0F5A), @@ -1710,6 +1714,8 @@ pub(crate) fn emit( let (prefix, opcode) = match op { SseOpcode::Movss => (LegacyPrefix::_F3, 0x0F11), SseOpcode::Movsd => (LegacyPrefix::_F2, 0x0F11), + SseOpcode::Movaps => (LegacyPrefix::None, 0x0F29), + SseOpcode::Movups => (LegacyPrefix::None, 0x0F11), _ => unimplemented!("Opcode {:?} not implemented", op), }; let dst = &dst.finalize(state); diff --git a/cranelift/codegen/src/isa/x64/inst/mod.rs b/cranelift/codegen/src/isa/x64/inst/mod.rs index 9fcabaaf0f..fd5037d295 100644 --- a/cranelift/codegen/src/isa/x64/inst/mod.rs +++ b/cranelift/codegen/src/isa/x64/inst/mod.rs @@ -1921,6 +1921,10 @@ impl MachInst for Inst { RegClass::V128 => match ty { F32 => Inst::xmm_mov(SseOpcode::Movss, RegMem::reg(src_reg), dst_reg, None), F64 => Inst::xmm_mov(SseOpcode::Movsd, RegMem::reg(src_reg), dst_reg, None), + _ if ty.is_vector() && ty.bits() == 128 => { + // TODO Specialize this move for different types: MOVUPD, MOVDQU, etc. + Inst::xmm_mov(SseOpcode::Movups, RegMem::reg(src_reg), dst_reg, None) + } _ => panic!("unexpected type {:?} in gen_move of regclass V128", ty), }, _ => panic!("gen_move(x64): unhandled regclass"), @@ -1942,7 +1946,8 @@ impl MachInst for Inst { fn rc_for_type(ty: Type) -> CodegenResult { match ty { I8 | I16 | I32 | I64 | B1 | B8 | B16 | B32 | B64 | R32 | R64 => Ok(RegClass::I64), - F32 | F64 | I128 | B128 => Ok(RegClass::V128), + F32 | F64 => Ok(RegClass::V128), + _ if ty.bits() == 128 => Ok(RegClass::V128), IFLAGS | FFLAGS => Ok(RegClass::I64), _ => Err(CodegenError::Unsupported(format!( "Unexpected SSA-value type: {}", diff --git a/cranelift/codegen/src/isa/x64/lower.rs b/cranelift/codegen/src/isa/x64/lower.rs index 0230074ffc..dadab97d94 100644 --- a/cranelift/codegen/src/isa/x64/lower.rs +++ b/cranelift/codegen/src/isa/x64/lower.rs @@ -1475,8 +1475,6 @@ fn lower_insn_to_regs>( _ => false, }; - let is_float = is_float_ty(elem_ty); - let addr = match op { Opcode::Load | Opcode::Uload8 @@ -1513,7 +1511,8 @@ fn lower_insn_to_regs>( let srcloc = Some(ctx.srcloc(insn)); let dst = output_to_reg(ctx, outputs[0]); - match (sign_extend, is_float) { + let is_xmm = elem_ty.is_float() || elem_ty.is_vector(); + match (sign_extend, is_xmm) { (true, false) => { // The load is sign-extended only when the output size is lower than 64 bits, // so ext-mode is defined in this case. @@ -1542,6 +1541,9 @@ fn lower_insn_to_regs>( ctx.emit(match elem_ty { F32 => Inst::xmm_mov(SseOpcode::Movss, RegMem::mem(addr), dst, srcloc), F64 => Inst::xmm_mov(SseOpcode::Movsd, RegMem::mem(addr), dst, srcloc), + _ if elem_ty.is_vector() && elem_ty.bits() == 128 => { + Inst::xmm_mov(SseOpcode::Movups, RegMem::mem(addr), dst, srcloc) + } // TODO Specialize for different types: MOVUPD, MOVDQU _ => unreachable!("unexpected type for load: {:?}", elem_ty), }); } @@ -1565,7 +1567,6 @@ fn lower_insn_to_regs>( Opcode::Store | Opcode::StoreComplex => ctx.input_ty(insn, 0), _ => unreachable!(), }; - let is_float = is_float_ty(elem_ty); let addr = match op { Opcode::Store | Opcode::Istore8 | Opcode::Istore16 | Opcode::Istore32 => { @@ -1599,15 +1600,15 @@ fn lower_insn_to_regs>( let srcloc = Some(ctx.srcloc(insn)); - if is_float { - ctx.emit(match elem_ty { - F32 => Inst::xmm_mov_r_m(SseOpcode::Movss, src, addr, srcloc), - F64 => Inst::xmm_mov_r_m(SseOpcode::Movsd, src, addr, srcloc), - _ => panic!("unexpected type for store {:?}", elem_ty), - }); - } else { - ctx.emit(Inst::mov_r_m(elem_ty.bytes() as u8, src, addr, srcloc)); - } + ctx.emit(match elem_ty { + F32 => Inst::xmm_mov_r_m(SseOpcode::Movss, src, addr, srcloc), + F64 => Inst::xmm_mov_r_m(SseOpcode::Movsd, src, addr, srcloc), + _ if elem_ty.is_vector() && elem_ty.bits() == 128 => { + // TODO Specialize for different types: MOVUPD, MOVDQU, etc. + Inst::xmm_mov_r_m(SseOpcode::Movups, src, addr, srcloc) + } + _ => Inst::mov_r_m(elem_ty.bytes() as u8, src, addr, srcloc), + }); } Opcode::FuncAddr => { @@ -1815,6 +1816,17 @@ fn lower_insn_to_regs>( )); } + Opcode::RawBitcast => { + // A raw_bitcast is just a mechanism for correcting the type of V128 values (see + // https://github.com/bytecodealliance/wasmtime/issues/1147). As such, this IR + // instruction should emit no machine code but a move is necessary to give the register + // allocator a definition for the output virtual register. + let src = input_to_reg(ctx, inputs[0]); + let dst = output_to_reg(ctx, outputs[0]); + let ty = ty.unwrap(); + ctx.emit(Inst::gen_move(dst, src, ty)); + } + Opcode::IaddImm | Opcode::ImulImm | Opcode::UdivImm