diff --git a/cranelift/codegen/src/isa/x64/inst.isle b/cranelift/codegen/src/isa/x64/inst.isle index 954d0c364e..68f3c566a6 100644 --- a/cranelift/codegen/src/isa/x64/inst.isle +++ b/cranelift/codegen/src/isa/x64/inst.isle @@ -283,6 +283,17 @@ (mask Xmm) (dst WritableXmm)) + ;; XMM unary op using a VEX encoding (aka AVX). + (XmmUnaryRmRVex (op AvxOpcode) + (src XmmMem) + (dst WritableXmm)) + + ;; XMM unary op using a VEX encoding (aka AVX) with an immediate. + (XmmUnaryRmRImmVex (op AvxOpcode) + (src XmmMem) + (dst WritableXmm) + (imm u8)) + ;; XMM (scalar or vector) binary op that relies on the EVEX ;; prefix. Takes two inputs. (XmmRmREvex (op Avx512Opcode) @@ -1314,6 +1325,37 @@ Vpsllq Vpsraw Vpsrad + Vpmovsxbw + Vpmovzxbw + Vpmovsxwd + Vpmovzxwd + Vpmovsxdq + Vpmovzxdq + Vaddss + Vaddsd + Vmulss + Vmulsd + Vsubss + Vsubsd + Vdivss + Vdivsd + Vpabsb + Vpabsw + Vpabsd + Vminss + Vminsd + Vmaxss + Vmaxsd + Vsqrtps + Vsqrtpd + Vroundps + Vroundpd + Vcvtdq2pd + Vcvtdq2ps + Vcvtpd2ps + Vcvtps2pd + Vcvttpd2dq + Vcvttps2dq )) (type Avx512Opcode extern @@ -1902,33 +1944,47 @@ (rule (x64_movdqu from) (xmm_unary_rm_r_unaligned (SseOpcode.Movdqu) from)) -(decl x64_movapd (XmmMem) Xmm) -(rule (x64_movapd src) - (xmm_unary_rm_r (SseOpcode.Movapd) src)) - (decl x64_pmovsxbw (XmmMem) Xmm) (rule (x64_pmovsxbw from) (xmm_unary_rm_r_unaligned (SseOpcode.Pmovsxbw) from)) +(rule 1 (x64_pmovsxbw from) + (if-let $true (has_avx)) + (xmm_unary_rm_r_vex (AvxOpcode.Vpmovsxbw) from)) (decl x64_pmovzxbw (XmmMem) Xmm) (rule (x64_pmovzxbw from) (xmm_unary_rm_r_unaligned (SseOpcode.Pmovzxbw) from)) +(rule 1 (x64_pmovzxbw from) + (if-let $true (has_avx)) + (xmm_unary_rm_r_vex (AvxOpcode.Vpmovzxbw) from)) (decl x64_pmovsxwd (XmmMem) Xmm) (rule (x64_pmovsxwd from) (xmm_unary_rm_r_unaligned (SseOpcode.Pmovsxwd) from)) +(rule 1 (x64_pmovsxwd from) + (if-let $true (has_avx)) + (xmm_unary_rm_r_vex (AvxOpcode.Vpmovsxwd) from)) (decl x64_pmovzxwd (XmmMem) Xmm) (rule (x64_pmovzxwd from) (xmm_unary_rm_r_unaligned (SseOpcode.Pmovzxwd) from)) +(rule 1 (x64_pmovzxwd from) + (if-let $true (has_avx)) + (xmm_unary_rm_r_vex (AvxOpcode.Vpmovzxwd) from)) (decl x64_pmovsxdq (XmmMem) Xmm) (rule (x64_pmovsxdq from) (xmm_unary_rm_r_unaligned (SseOpcode.Pmovsxdq) from)) +(rule 1 (x64_pmovsxdq from) + (if-let $true (has_avx)) + (xmm_unary_rm_r_vex (AvxOpcode.Vpmovsxdq) from)) (decl x64_pmovzxdq (XmmMem) Xmm) (rule (x64_pmovzxdq from) (xmm_unary_rm_r_unaligned (SseOpcode.Pmovzxdq) from)) +(rule 1 (x64_pmovzxdq from) + (if-let $true (has_avx)) + (xmm_unary_rm_r_vex (AvxOpcode.Vpmovzxdq) from)) (decl x64_movrm (Type SyntheticAmode Gpr) SideEffectNoResult) (rule (x64_movrm ty addr data) @@ -2702,11 +2758,17 @@ (decl x64_addss (Xmm XmmMem) Xmm) (rule (x64_addss src1 src2) (xmm_rm_r_unaligned (SseOpcode.Addss) src1 src2)) +(rule 1 (x64_addss src1 src2) + (if-let $true (has_avx)) + (xmm_rmir_vex (AvxOpcode.Vaddss) src1 src2)) ;; Helper for creating `addsd` instructions. (decl x64_addsd (Xmm XmmMem) Xmm) (rule (x64_addsd src1 src2) (xmm_rm_r_unaligned (SseOpcode.Addsd) src1 src2)) +(rule 1 (x64_addsd src1 src2) + (if-let $true (has_avx)) + (xmm_rmir_vex (AvxOpcode.Vaddsd) src1 src2)) ;; Helper for creating `addps` instructions. (decl x64_addps (Xmm XmmMem) Xmm) @@ -2728,11 +2790,17 @@ (decl x64_subss (Xmm XmmMem) Xmm) (rule (x64_subss src1 src2) (xmm_rm_r_unaligned (SseOpcode.Subss) src1 src2)) +(rule 1 (x64_subss src1 src2) + (if-let $true (has_avx)) + (xmm_rmir_vex (AvxOpcode.Vsubss) src1 src2)) ;; Helper for creating `subsd` instructions. (decl x64_subsd (Xmm XmmMem) Xmm) (rule (x64_subsd src1 src2) (xmm_rm_r_unaligned (SseOpcode.Subsd) src1 src2)) +(rule 1 (x64_subsd src1 src2) + (if-let $true (has_avx)) + (xmm_rmir_vex (AvxOpcode.Vsubsd) src1 src2)) ;; Helper for creating `subps` instructions. (decl x64_subps (Xmm XmmMem) Xmm) @@ -2754,11 +2822,17 @@ (decl x64_mulss (Xmm XmmMem) Xmm) (rule (x64_mulss src1 src2) (xmm_rm_r_unaligned (SseOpcode.Mulss) src1 src2)) +(rule 1 (x64_mulss src1 src2) + (if-let $true (has_avx)) + (xmm_rmir_vex (AvxOpcode.Vmulss) src1 src2)) ;; Helper for creating `mulsd` instructions. (decl x64_mulsd (Xmm XmmMem) Xmm) (rule (x64_mulsd src1 src2) (xmm_rm_r_unaligned (SseOpcode.Mulsd) src1 src2)) +(rule 1 (x64_mulsd src1 src2) + (if-let $true (has_avx)) + (xmm_rmir_vex (AvxOpcode.Vmulsd) src1 src2)) ;; Helper for creating `mulps` instructions. (decl x64_mulps (Xmm XmmMem) Xmm) @@ -2780,11 +2854,17 @@ (decl x64_divss (Xmm XmmMem) Xmm) (rule (x64_divss src1 src2) (xmm_rm_r_unaligned (SseOpcode.Divss) src1 src2)) +(rule 1 (x64_divss src1 src2) + (if-let $true (has_avx)) + (xmm_rmir_vex (AvxOpcode.Vdivss) src1 src2)) ;; Helper for creating `divsd` instructions. (decl x64_divsd (Xmm XmmMem) Xmm) (rule (x64_divsd src1 src2) (xmm_rm_r_unaligned (SseOpcode.Divsd) src1 src2)) +(rule 1 (x64_divsd src1 src2) + (if-let $true (has_avx)) + (xmm_rmir_vex (AvxOpcode.Vdivsd) src1 src2)) ;; Helper for creating `divps` instructions. (decl x64_divps (Xmm XmmMem) Xmm) @@ -2816,6 +2896,20 @@ (_ Unit (emit (MInst.XmmRmRBlendVex op src1 src2 mask dst)))) dst)) +;; Helper for creating `XmmUnaryRmRVex` instructions +(decl xmm_unary_rm_r_vex (AvxOpcode XmmMem) Xmm) +(rule (xmm_unary_rm_r_vex op src) + (let ((dst WritableXmm (temp_writable_xmm)) + (_ Unit (emit (MInst.XmmUnaryRmRVex op src dst)))) + dst)) + +;; Helper for creating `XmmUnaryRmRImmVex` instructions +(decl xmm_unary_rm_r_imm_vex (AvxOpcode XmmMem u8) Xmm) +(rule (xmm_unary_rm_r_imm_vex op src imm) + (let ((dst WritableXmm (temp_writable_xmm)) + (_ Unit (emit (MInst.XmmUnaryRmRImmVex op src dst imm)))) + dst)) + ;; Helper for creating `blendvp{d,s}` and `pblendvb` instructions. (decl x64_blend (Type Xmm XmmMem Xmm) Xmm) (rule 1 (x64_blend $F32X4 mask src1 src2) (x64_blendvps src2 src1 mask)) @@ -3131,11 +3225,17 @@ (decl x64_roundps (XmmMem RoundImm) Xmm) (rule (x64_roundps src1 round) (xmm_unary_rm_r_imm (SseOpcode.Roundps) src1 (encode_round_imm round))) +(rule 1 (x64_roundps src1 round) + (if-let $true (has_avx)) + (xmm_unary_rm_r_imm_vex (AvxOpcode.Vroundps) src1 (encode_round_imm round))) ;; Helper for creating `roundpd` instructions. (decl x64_roundpd (XmmMem RoundImm) Xmm) (rule (x64_roundpd src1 round) (xmm_unary_rm_r_imm (SseOpcode.Roundpd) src1 (encode_round_imm round))) +(rule 1 (x64_roundpd src1 round) + (if-let $true (has_avx)) + (xmm_unary_rm_r_imm_vex (AvxOpcode.Vroundpd) src1 (encode_round_imm round))) ;; Helper for creating `pmaddwd` instructions. (decl x64_pmaddwd (Xmm XmmMem) Xmm) @@ -3207,16 +3307,25 @@ (decl x64_pabsb (XmmMem) Xmm) (rule (x64_pabsb src) (xmm_unary_rm_r (SseOpcode.Pabsb) src)) +(rule 1 (x64_pabsb src) + (if-let $true (has_avx)) + (xmm_unary_rm_r_vex (AvxOpcode.Vpabsb) src)) ;; Helper for creating `pabsw` instructions. (decl x64_pabsw (XmmMem) Xmm) (rule (x64_pabsw src) (xmm_unary_rm_r (SseOpcode.Pabsw) src)) +(rule 1 (x64_pabsw src) + (if-let $true (has_avx)) + (xmm_unary_rm_r_vex (AvxOpcode.Vpabsw) src)) ;; Helper for creating `pabsd` instructions. (decl x64_pabsd (XmmMem) Xmm) (rule (x64_pabsd src) (xmm_unary_rm_r (SseOpcode.Pabsd) src)) +(rule 1 (x64_pabsd src) + (if-let $true (has_avx)) + (xmm_unary_rm_r_vex (AvxOpcode.Vpabsd) src)) ;; Helper for creating `MInst.XmmUnaryRmREvex` instructions. (decl xmm_unary_rm_r_evex (Avx512Opcode XmmMem) Xmm) @@ -3540,11 +3649,17 @@ (decl x64_minss (Xmm XmmMem) Xmm) (rule (x64_minss x y) (xmm_rm_r_unaligned (SseOpcode.Minss) x y)) +(rule 1 (x64_minss x y) + (if-let $true (has_avx)) + (xmm_rmir_vex (AvxOpcode.Vminss) x y)) ;; Helper for creating `minsd` instructions. (decl x64_minsd (Xmm XmmMem) Xmm) (rule (x64_minsd x y) (xmm_rm_r_unaligned (SseOpcode.Minsd) x y)) +(rule 1 (x64_minsd x y) + (if-let $true (has_avx)) + (xmm_rmir_vex (AvxOpcode.Vminsd) x y)) ;; Helper for creating `minps` instructions. (decl x64_minps (Xmm XmmMem) Xmm) @@ -3566,11 +3681,17 @@ (decl x64_maxss (Xmm XmmMem) Xmm) (rule (x64_maxss x y) (xmm_rm_r_unaligned (SseOpcode.Maxss) x y)) +(rule 1 (x64_maxss x y) + (if-let $true (has_avx)) + (xmm_rmir_vex (AvxOpcode.Vmaxss) x y)) ;; Helper for creating `maxsd` instructions. (decl x64_maxsd (Xmm XmmMem) Xmm) (rule (x64_maxsd x y) (xmm_rm_r_unaligned (SseOpcode.Maxsd) x y)) +(rule 1 (x64_maxsd x y) + (if-let $true (has_avx)) + (xmm_rmir_vex (AvxOpcode.Vmaxsd) x y)) ;; Helper for creating `maxps` instructions. (decl x64_maxps (Xmm XmmMem) Xmm) @@ -3649,10 +3770,16 @@ ;; Helper for creating `sqrtps` instructions. (decl x64_sqrtps (XmmMem) Xmm) (rule (x64_sqrtps x) (xmm_unary_rm_r (SseOpcode.Sqrtps) x)) +(rule 1 (x64_sqrtps x) + (if-let $true (has_avx)) + (xmm_unary_rm_r_vex (AvxOpcode.Vsqrtps) x)) ;; Helper for creating `sqrtpd` instructions. (decl x64_sqrtpd (XmmMem) Xmm) (rule (x64_sqrtpd x) (xmm_unary_rm_r (SseOpcode.Sqrtpd) x)) +(rule 1 (x64_sqrtpd x) + (if-let $true (has_avx)) + (xmm_unary_rm_r_vex (AvxOpcode.Vsqrtpd) x)) ;; Helper for creating `cvtss2sd` instructions. (decl x64_cvtss2sd (Xmm) Xmm) @@ -3665,18 +3792,30 @@ ;; Helper for creating `cvtdq2ps` instructions. (decl x64_cvtdq2ps (XmmMem) Xmm) (rule (x64_cvtdq2ps x) (xmm_unary_rm_r (SseOpcode.Cvtdq2ps) x)) +(rule 1 (x64_cvtdq2ps x) + (if-let $true (has_avx)) + (xmm_unary_rm_r_vex (AvxOpcode.Vcvtdq2ps) x)) ;; Helper for creating `cvtps2pd` instructions. (decl x64_cvtps2pd (XmmMem) Xmm) (rule (x64_cvtps2pd x) (xmm_unary_rm_r (SseOpcode.Cvtps2pd) x)) +(rule 1 (x64_cvtps2pd x) + (if-let $true (has_avx)) + (xmm_unary_rm_r_vex (AvxOpcode.Vcvtps2pd) x)) ;; Helper for creating `cvtpd2ps` instructions. (decl x64_cvtpd2ps (XmmMem) Xmm) (rule (x64_cvtpd2ps x) (xmm_unary_rm_r (SseOpcode.Cvtpd2ps) x)) +(rule 1 (x64_cvtpd2ps x) + (if-let $true (has_avx)) + (xmm_unary_rm_r_vex (AvxOpcode.Vcvtpd2ps) x)) ;; Helper for creating `cvtdq2pd` instructions. (decl x64_cvtdq2pd (XmmMem) Xmm) (rule (x64_cvtdq2pd x) (xmm_unary_rm_r (SseOpcode.Cvtdq2pd) x)) +(rule 1 (x64_cvtdq2pd x) + (if-let $true (has_avx)) + (xmm_unary_rm_r_vex (AvxOpcode.Vcvtdq2pd) x)) ;; Helper for creating `cvtsi2ss` instructions. (decl x64_cvtsi2ss (Type GprMem) Xmm) @@ -3692,11 +3831,17 @@ (decl x64_cvttps2dq (XmmMem) Xmm) (rule (x64_cvttps2dq x) (xmm_unary_rm_r (SseOpcode.Cvttps2dq) x)) +(rule 1 (x64_cvttps2dq x) + (if-let $true (has_avx)) + (xmm_unary_rm_r_vex (AvxOpcode.Vcvttps2dq) x)) ;; Helper for creating `cvttpd2dq` instructions. (decl x64_cvttpd2dq (XmmMem) Xmm) (rule (x64_cvttpd2dq x) (xmm_unary_rm_r (SseOpcode.Cvttpd2dq) x)) +(rule 1 (x64_cvttpd2dq x) + (if-let $true (has_avx)) + (xmm_unary_rm_r_vex (AvxOpcode.Vcvttpd2dq) x)) (decl cvt_u64_to_float_seq (Type Gpr) Xmm) (rule (cvt_u64_to_float_seq ty src) diff --git a/cranelift/codegen/src/isa/x64/inst/args.rs b/cranelift/codegen/src/isa/x64/inst/args.rs index b85fdfc20e..772727133b 100644 --- a/cranelift/codegen/src/isa/x64/inst/args.rs +++ b/cranelift/codegen/src/isa/x64/inst/args.rs @@ -1630,7 +1630,38 @@ impl AvxOpcode { | AvxOpcode::Vpslld | AvxOpcode::Vpsllq | AvxOpcode::Vpsraw - | AvxOpcode::Vpsrad => { + | AvxOpcode::Vpsrad + | AvxOpcode::Vpmovsxbw + | AvxOpcode::Vpmovzxbw + | AvxOpcode::Vpmovsxwd + | AvxOpcode::Vpmovzxwd + | AvxOpcode::Vpmovsxdq + | AvxOpcode::Vpmovzxdq + | AvxOpcode::Vaddss + | AvxOpcode::Vaddsd + | AvxOpcode::Vmulss + | AvxOpcode::Vmulsd + | AvxOpcode::Vsubss + | AvxOpcode::Vsubsd + | AvxOpcode::Vdivss + | AvxOpcode::Vdivsd + | AvxOpcode::Vpabsb + | AvxOpcode::Vpabsw + | AvxOpcode::Vpabsd + | AvxOpcode::Vminss + | AvxOpcode::Vminsd + | AvxOpcode::Vmaxss + | AvxOpcode::Vmaxsd + | AvxOpcode::Vsqrtps + | AvxOpcode::Vsqrtpd + | AvxOpcode::Vroundpd + | AvxOpcode::Vroundps + | AvxOpcode::Vcvtdq2pd + | AvxOpcode::Vcvtdq2ps + | AvxOpcode::Vcvtpd2ps + | AvxOpcode::Vcvtps2pd + | AvxOpcode::Vcvttpd2dq + | AvxOpcode::Vcvttps2dq => { smallvec![InstructionSet::AVX] } } diff --git a/cranelift/codegen/src/isa/x64/inst/emit.rs b/cranelift/codegen/src/isa/x64/inst/emit.rs index 44de9450f8..3674c6c295 100644 --- a/cranelift/codegen/src/isa/x64/inst/emit.rs +++ b/cranelift/codegen/src/isa/x64/inst/emit.rs @@ -2182,6 +2182,18 @@ pub(crate) fn emit( AvxOpcode::Vpsllq => (LP::_66, OM::_0F, 0xF3), AvxOpcode::Vpsraw => (LP::_66, OM::_0F, 0xE1), AvxOpcode::Vpsrad => (LP::_66, OM::_0F, 0xE2), + AvxOpcode::Vaddss => (LP::_F3, OM::_0F, 0x58), + AvxOpcode::Vaddsd => (LP::_F2, OM::_0F, 0x58), + AvxOpcode::Vmulss => (LP::_F3, OM::_0F, 0x59), + AvxOpcode::Vmulsd => (LP::_F2, OM::_0F, 0x59), + AvxOpcode::Vsubss => (LP::_F3, OM::_0F, 0x5C), + AvxOpcode::Vsubsd => (LP::_F2, OM::_0F, 0x5C), + AvxOpcode::Vdivss => (LP::_F3, OM::_0F, 0x5E), + AvxOpcode::Vdivsd => (LP::_F2, OM::_0F, 0x5E), + AvxOpcode::Vminss => (LP::_F3, OM::_0F, 0x5D), + AvxOpcode::Vminsd => (LP::_F2, OM::_0F, 0x5D), + AvxOpcode::Vmaxss => (LP::_F3, OM::_0F, 0x5F), + AvxOpcode::Vmaxsd => (LP::_F2, OM::_0F, 0x5F), _ => panic!("unexpected rmir vex opcode {op:?}"), }; VexInstruction::new() @@ -2359,6 +2371,72 @@ pub(crate) fn emit( .encode(sink); } + Inst::XmmUnaryRmRVex { op, src, dst } => { + let dst = allocs.next(dst.to_reg().to_reg()); + let src = match src.clone().to_reg_mem().with_allocs(allocs) { + RegMem::Reg { reg } => { + RegisterOrAmode::Register(reg.to_real_reg().unwrap().hw_enc().into()) + } + RegMem::Mem { addr } => RegisterOrAmode::Amode(addr.finalize(state, sink)), + }; + + let (prefix, map, opcode) = match op { + AvxOpcode::Vpmovsxbw => (LegacyPrefixes::_66, OpcodeMap::_0F38, 0x20), + AvxOpcode::Vpmovzxbw => (LegacyPrefixes::_66, OpcodeMap::_0F38, 0x30), + AvxOpcode::Vpmovsxwd => (LegacyPrefixes::_66, OpcodeMap::_0F38, 0x23), + AvxOpcode::Vpmovzxwd => (LegacyPrefixes::_66, OpcodeMap::_0F38, 0x33), + AvxOpcode::Vpmovsxdq => (LegacyPrefixes::_66, OpcodeMap::_0F38, 0x25), + AvxOpcode::Vpmovzxdq => (LegacyPrefixes::_66, OpcodeMap::_0F38, 0x35), + AvxOpcode::Vpabsb => (LegacyPrefixes::_66, OpcodeMap::_0F38, 0x1C), + AvxOpcode::Vpabsw => (LegacyPrefixes::_66, OpcodeMap::_0F38, 0x1D), + AvxOpcode::Vpabsd => (LegacyPrefixes::_66, OpcodeMap::_0F38, 0x1E), + AvxOpcode::Vsqrtps => (LegacyPrefixes::None, OpcodeMap::_0F, 0x51), + AvxOpcode::Vsqrtpd => (LegacyPrefixes::_66, OpcodeMap::_0F, 0x51), + AvxOpcode::Vcvtdq2pd => (LegacyPrefixes::_F3, OpcodeMap::_0F, 0xE6), + AvxOpcode::Vcvtdq2ps => (LegacyPrefixes::None, OpcodeMap::_0F, 0x5B), + AvxOpcode::Vcvtpd2ps => (LegacyPrefixes::_66, OpcodeMap::_0F, 0x5A), + AvxOpcode::Vcvtps2pd => (LegacyPrefixes::None, OpcodeMap::_0F, 0x5A), + AvxOpcode::Vcvttpd2dq => (LegacyPrefixes::_66, OpcodeMap::_0F, 0xE6), + AvxOpcode::Vcvttps2dq => (LegacyPrefixes::_F3, OpcodeMap::_0F, 0x5B), + _ => panic!("unexpected rmr_imm_vex opcode {op:?}"), + }; + + VexInstruction::new() + .length(VexVectorLength::V128) + .prefix(prefix) + .map(map) + .opcode(opcode) + .reg(dst.to_real_reg().unwrap().hw_enc()) + .rm(src) + .encode(sink); + } + + Inst::XmmUnaryRmRImmVex { op, src, dst, imm } => { + let dst = allocs.next(dst.to_reg().to_reg()); + let src = match src.clone().to_reg_mem().with_allocs(allocs) { + RegMem::Reg { reg } => { + RegisterOrAmode::Register(reg.to_real_reg().unwrap().hw_enc().into()) + } + RegMem::Mem { addr } => RegisterOrAmode::Amode(addr.finalize(state, sink)), + }; + + let (prefix, map, opcode) = match op { + AvxOpcode::Vroundps => (LegacyPrefixes::_66, OpcodeMap::_0F3A, 0x08), + AvxOpcode::Vroundpd => (LegacyPrefixes::_66, OpcodeMap::_0F3A, 0x09), + _ => panic!("unexpected rmr_imm_vex opcode {op:?}"), + }; + + VexInstruction::new() + .length(VexVectorLength::V128) + .prefix(prefix) + .map(map) + .opcode(opcode) + .reg(dst.to_real_reg().unwrap().hw_enc()) + .rm(src) + .imm(*imm) + .encode(sink); + } + Inst::XmmRmREvex { op, src1, diff --git a/cranelift/codegen/src/isa/x64/inst/mod.rs b/cranelift/codegen/src/isa/x64/inst/mod.rs index 1a5ca83548..4e15b2aea4 100644 --- a/cranelift/codegen/src/isa/x64/inst/mod.rs +++ b/cranelift/codegen/src/isa/x64/inst/mod.rs @@ -151,7 +151,9 @@ impl Inst { | Inst::XmmRmRVex3 { op, .. } | Inst::XmmRmRImmVex { op, .. } | Inst::XmmRmRBlendVex { op, .. } - | Inst::XmmVexPinsr { op, .. } => op.available_from(), + | Inst::XmmVexPinsr { op, .. } + | Inst::XmmUnaryRmRVex { op, .. } + | Inst::XmmUnaryRmRImmVex { op, .. } => op.available_from(), } } } @@ -910,6 +912,20 @@ impl PrettyPrint for Inst { format!("{} ${}, {}, {}", ljustify(op.to_string()), imm, src, dst) } + Inst::XmmUnaryRmRVex { op, src, dst, .. } => { + let dst = pretty_print_reg(dst.to_reg().to_reg(), 8, allocs); + let src = src.pretty_print(8, allocs); + format!("{} {}, {}", ljustify(op.to_string()), src, dst) + } + + Inst::XmmUnaryRmRImmVex { + op, src, dst, imm, .. + } => { + let dst = pretty_print_reg(dst.to_reg().to_reg(), 8, allocs); + let src = src.pretty_print(8, allocs); + format!("{} ${imm}, {}, {}", ljustify(op.to_string()), src, dst) + } + Inst::XmmUnaryRmREvex { op, src, dst, .. } => { let dst = pretty_print_reg(dst.to_reg().to_reg(), 8, allocs); let src = src.pretty_print(8, allocs); @@ -1887,7 +1903,10 @@ fn x64_get_operands VReg>(inst: &Inst, collector: &mut OperandCol collector.reg_def(dst.to_writable_reg()); src.get_operands(collector); } - Inst::XmmUnaryRmREvex { src, dst, .. } | Inst::XmmUnaryRmRUnaligned { src, dst, .. } => { + Inst::XmmUnaryRmREvex { src, dst, .. } + | Inst::XmmUnaryRmRUnaligned { src, dst, .. } + | Inst::XmmUnaryRmRVex { src, dst, .. } + | Inst::XmmUnaryRmRImmVex { src, dst, .. } => { collector.reg_def(dst.to_writable_reg()); src.get_operands(collector); } diff --git a/cranelift/filetests/filetests/isa/x64/float-avx.clif b/cranelift/filetests/filetests/isa/x64/float-avx.clif new file mode 100644 index 0000000000..8626b34757 --- /dev/null +++ b/cranelift/filetests/filetests/isa/x64/float-avx.clif @@ -0,0 +1,598 @@ +test compile precise-output +set enable_simd +target x86_64 has_avx + +function %f32_add(f32, f32) -> f32 { +block0(v0: f32, v1: f32): + v2 = fadd v0, v1 + return v2 +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; vaddss %xmm0, %xmm1, %xmm0 +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; vaddss %xmm1, %xmm0, %xmm0 +; movq %rbp, %rsp +; popq %rbp +; retq + +function %f64_add(f64, f64) -> f64 { +block0(v0: f64, v1: f64): + v2 = fadd v0, v1 + return v2 +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; vaddsd %xmm0, %xmm1, %xmm0 +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; vaddsd %xmm1, %xmm0, %xmm0 +; movq %rbp, %rsp +; popq %rbp +; retq + +function %f32_sub(f32, f32) -> f32 { +block0(v0: f32, v1: f32): + v2 = fsub v0, v1 + return v2 +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; vsubss %xmm0, %xmm1, %xmm0 +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; vsubss %xmm1, %xmm0, %xmm0 +; movq %rbp, %rsp +; popq %rbp +; retq + +function %f64_sub(f64, f64) -> f64 { +block0(v0: f64, v1: f64): + v2 = fsub v0, v1 + return v2 +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; vsubsd %xmm0, %xmm1, %xmm0 +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; vsubsd %xmm1, %xmm0, %xmm0 +; movq %rbp, %rsp +; popq %rbp +; retq + +function %f32_mul(f32, f32) -> f32 { +block0(v0: f32, v1: f32): + v2 = fmul v0, v1 + return v2 +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; vmulss %xmm0, %xmm1, %xmm0 +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; vmulss %xmm1, %xmm0, %xmm0 +; movq %rbp, %rsp +; popq %rbp +; retq + +function %f64_mul(f64, f64) -> f64 { +block0(v0: f64, v1: f64): + v2 = fmul v0, v1 + return v2 +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; vmulsd %xmm0, %xmm1, %xmm0 +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; vmulsd %xmm1, %xmm0, %xmm0 +; movq %rbp, %rsp +; popq %rbp +; retq + +function %f32_div(f32, f32) -> f32 { +block0(v0: f32, v1: f32): + v2 = fdiv v0, v1 + return v2 +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; vdivss %xmm0, %xmm1, %xmm0 +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; vdivss %xmm1, %xmm0, %xmm0 +; movq %rbp, %rsp +; popq %rbp +; retq + +function %f64_div(f64, f64) -> f64 { +block0(v0: f64, v1: f64): + v2 = fdiv v0, v1 + return v2 +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; vdivsd %xmm0, %xmm1, %xmm0 +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; vdivsd %xmm1, %xmm0, %xmm0 +; movq %rbp, %rsp +; popq %rbp +; retq + +function %f32_min(f32, f32) -> f32 { +block0(v0: f32, v1: f32): + v2 = fmin_pseudo v0, v1 + return v2 +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; vminss %xmm1, %xmm0, %xmm0 +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; vminss %xmm0, %xmm1, %xmm0 +; movq %rbp, %rsp +; popq %rbp +; retq + +function %f64_min(f64, f64) -> f64 { +block0(v0: f64, v1: f64): + v2 = fmin_pseudo v0, v1 + return v2 +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; vminsd %xmm1, %xmm0, %xmm0 +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; vminsd %xmm0, %xmm1, %xmm0 +; movq %rbp, %rsp +; popq %rbp +; retq + +function %f32_max(f32, f32) -> f32 { +block0(v0: f32, v1: f32): + v2 = fmax_pseudo v0, v1 + return v2 +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; vmaxss %xmm1, %xmm0, %xmm0 +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; vmaxss %xmm0, %xmm1, %xmm0 +; movq %rbp, %rsp +; popq %rbp +; retq + +function %f64_max(f64, f64) -> f64 { +block0(v0: f64, v1: f64): + v2 = fmax_pseudo v0, v1 + return v2 +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; vmaxsd %xmm1, %xmm0, %xmm0 +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; vmaxsd %xmm0, %xmm1, %xmm0 +; movq %rbp, %rsp +; popq %rbp +; retq + +function %f32x4_sqrt(f32x4) -> f32x4 { +block0(v0: f32x4): + v1 = sqrt v0 + return v1 +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; vsqrtps %xmm0, %xmm0 +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; vsqrtps %xmm0, %xmm0 +; movq %rbp, %rsp +; popq %rbp +; retq + +function %f64x2_sqrt(f64x2) -> f64x2 { +block0(v0: f64x2): + v1 = sqrt v0 + return v1 +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; vsqrtpd %xmm0, %xmm0 +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; vsqrtpd %xmm0, %xmm0 +; movq %rbp, %rsp +; popq %rbp +; retq + +function %f32x4_floor(f32x4) -> f32x4 { +block0(v0: f32x4): + v1 = floor v0 + return v1 +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; vroundps $1, %xmm0, %xmm0 +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; vroundps $1, %xmm0, %xmm0 +; movq %rbp, %rsp +; popq %rbp +; retq + +function %f64x2_floor(f64x2) -> f64x2 { +block0(v0: f64x2): + v1 = floor v0 + return v1 +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; vroundpd $1, %xmm0, %xmm0 +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; vroundpd $1, %xmm0, %xmm0 +; movq %rbp, %rsp +; popq %rbp +; retq + +function %fcvt_low_from_sint(i32x4) -> f64x2 { +block0(v0: i32x4): + v1 = fcvt_low_from_sint.f64x2 v0 + return v1 +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; vcvtdq2pd %xmm0, %xmm0 +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; vcvtdq2pd %xmm0, %xmm0 +; movq %rbp, %rsp +; popq %rbp +; retq + +function %fcvt_from_uint(i32x4) -> f32x4 { +block0(v0: i32x4): + v1 = fcvt_from_uint.f32x4 v0 + return v1 +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; vpslld %xmm0, $16, %xmm2 +; vpsrld %xmm2, $16, %xmm4 +; vpsubd %xmm0, %xmm4, %xmm6 +; vcvtdq2ps %xmm4, %xmm8 +; vpsrld %xmm6, $1, %xmm10 +; vcvtdq2ps %xmm10, %xmm12 +; vaddps %xmm12, %xmm12, %xmm14 +; vaddps %xmm14, %xmm8, %xmm0 +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; vpslld $0x10, %xmm0, %xmm2 +; vpsrld $0x10, %xmm2, %xmm4 +; vpsubd %xmm4, %xmm0, %xmm6 +; vcvtdq2ps %xmm4, %xmm8 +; vpsrld $1, %xmm6, %xmm10 +; vcvtdq2ps %xmm10, %xmm12 +; vaddps %xmm12, %xmm12, %xmm14 +; vaddps %xmm8, %xmm14, %xmm0 +; movq %rbp, %rsp +; popq %rbp +; retq + +function %fvdemote(f64x2) -> f32x4 { +block0(v0: f64x2): + v1 = fvdemote v0 + return v1 +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; vcvtpd2ps %xmm0, %xmm0 +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; vcvtpd2ps %xmm0, %xmm0 +; movq %rbp, %rsp +; popq %rbp +; retq + +function %fvpromote_low(f32x4) -> f64x2 { +block0(v0: f32x4): + v1 = fvpromote_low v0 + return v1 +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; vcvtps2pd %xmm0, %xmm0 +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; vcvtps2pd %xmm0, %xmm0 +; movq %rbp, %rsp +; popq %rbp +; retq + +function %fcvt_to_sint_sat(f32x4) -> i32x4 { +block0(v0: f32x4): + v1 = fcvt_to_sint_sat.i32x4 v0 + return v1 +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; vcmpps $0 %xmm0, %xmm0, %xmm2 +; vandps %xmm0, %xmm2, %xmm4 +; vpxor %xmm2, %xmm4, %xmm6 +; vcvttps2dq %xmm4, %xmm8 +; vpand %xmm8, %xmm6, %xmm10 +; vpsrad %xmm10, $31, %xmm12 +; vpxor %xmm12, %xmm8, %xmm0 +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; vcmpeqps %xmm0, %xmm0, %xmm2 +; vandps %xmm2, %xmm0, %xmm4 +; vpxor %xmm4, %xmm2, %xmm6 +; vcvttps2dq %xmm4, %xmm8 +; vpand %xmm6, %xmm8, %xmm10 +; vpsrad $0x1f, %xmm10, %xmm12 +; vpxor %xmm8, %xmm12, %xmm0 +; movq %rbp, %rsp +; popq %rbp +; retq + +function %fcvt_to_sint_sat_snarrow(f64x2) -> i32x4 { +block0(v0: f64x2): + v1 = fcvt_to_sint_sat.i64x2 v0 + v2 = vconst.i64x2 0x00 + v3 = snarrow v1, v2 + return v3 +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; vcmppd $0 %xmm0, %xmm0, %xmm2 +; movupd const(0), %xmm4 +; vandps %xmm2, %xmm4, %xmm6 +; vminpd %xmm0, %xmm6, %xmm8 +; vcvttpd2dq %xmm8, %xmm0 +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; vcmpeqpd %xmm0, %xmm0, %xmm2 +; movupd 0x1f(%rip), %xmm4 +; vandps %xmm4, %xmm2, %xmm6 +; vminpd %xmm6, %xmm0, %xmm8 +; vcvttpd2dq %xmm8, %xmm0 +; movq %rbp, %rsp +; popq %rbp +; retq +; addb %al, (%rax) +; addb %al, (%rax) +; addb %al, (%rax) +; addb %al, (%rax) +; addb %al, (%rax) +; addb %al, (%rax) +; addb %al, (%rax) +; addb %al, %al + diff --git a/cranelift/filetests/filetests/isa/x64/simd-arith-avx.clif b/cranelift/filetests/filetests/isa/x64/simd-arith-avx.clif index fa6ceda2c6..3232802602 100644 --- a/cranelift/filetests/filetests/isa/x64/simd-arith-avx.clif +++ b/cranelift/filetests/filetests/isa/x64/simd-arith-avx.clif @@ -1213,7 +1213,7 @@ block0(v0: i8x16): ; movq %rsp, %rbp ; block0: ; vpalignr $8 %xmm0, %xmm0, %xmm2 -; pmovzxbw %xmm2, %xmm0 +; vpmovzxbw %xmm2, %xmm0 ; movq %rbp, %rsp ; popq %rbp ; ret @@ -1224,7 +1224,7 @@ block0(v0: i8x16): ; movq %rsp, %rbp ; block1: ; offset 0x4 ; vpalignr $8, %xmm0, %xmm0, %xmm2 -; pmovzxbw %xmm2, %xmm0 +; vpmovzxbw %xmm2, %xmm0 ; movq %rbp, %rsp ; popq %rbp ; retq @@ -1359,7 +1359,7 @@ block0(v0: f64x2): ; vmaxpd %xmm0, %xmm2, %xmm4 ; movupd const(0), %xmm6 ; vminpd %xmm4, %xmm6, %xmm8 -; roundpd $3, %xmm8, %xmm10 +; vroundpd $3, %xmm8, %xmm10 ; movupd const(1), %xmm12 ; vaddpd %xmm10, %xmm12, %xmm14 ; vshufps $136 %xmm14, %xmm2, %xmm0 @@ -1376,8 +1376,8 @@ block0(v0: f64x2): ; vmaxpd %xmm2, %xmm0, %xmm4 ; movupd 0x2c(%rip), %xmm6 ; vminpd %xmm6, %xmm4, %xmm8 -; roundpd $3, %xmm8, %xmm10 -; movupd 0x28(%rip), %xmm12 +; vroundpd $3, %xmm8, %xmm10 +; movupd 0x29(%rip), %xmm12 ; vaddpd %xmm12, %xmm10, %xmm14 ; vshufps $0x88, %xmm2, %xmm14, %xmm0 ; movq %rbp, %rsp @@ -1388,7 +1388,8 @@ block0(v0: f64x2): ; addb %al, (%rax) ; addb %al, (%rax) ; addb %al, (%rax) -; addb %ah, %al +; addb %al, (%rax) +; loopne 0x43 function %i8x16_shl(i8x16, i32) -> i8x16 { block0(v0: i8x16, v1: i32): @@ -1884,3 +1885,78 @@ block0(v0: i64x2): ; popq %rbp ; retq +function %i8x16_abs(i8x16) -> i8x16 { +block0(v0: i8x16): + v1 = iabs v0 + return v1 +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; vpabsb %xmm0, %xmm0 +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; vpabsb %xmm0, %xmm0 +; movq %rbp, %rsp +; popq %rbp +; retq + +function %i16x8_abs(i16x8) -> i16x8 { +block0(v0: i16x8): + v1 = iabs v0 + return v1 +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; vpabsw %xmm0, %xmm0 +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; vpabsw %xmm0, %xmm0 +; movq %rbp, %rsp +; popq %rbp +; retq + +function %i32x4_abs(i32x4) -> i32x4 { +block0(v0: i32x4): + v1 = iabs v0 + return v1 +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; vpabsd %xmm0, %xmm0 +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; vpabsd %xmm0, %xmm0 +; movq %rbp, %rsp +; popq %rbp +; retq + diff --git a/cranelift/filetests/filetests/isa/x64/simd-load-avx.clif b/cranelift/filetests/filetests/isa/x64/simd-load-avx.clif new file mode 100644 index 0000000000..df4f25a996 --- /dev/null +++ b/cranelift/filetests/filetests/isa/x64/simd-load-avx.clif @@ -0,0 +1,154 @@ +test compile precise-output +set enable_simd +target x86_64 has_avx + +function %sload8x8(i64) -> i16x8 { +block0(v0: i64): + v1 = sload8x8 v0 + return v1 +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; vpmovsxbw 0(%rdi), %xmm0 +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; vpmovsxbw (%rdi), %xmm0 ; trap: heap_oob +; movq %rbp, %rsp +; popq %rbp +; retq + +function %uload8x8(i64) -> i16x8 { +block0(v0: i64): + v1 = uload8x8 v0 + return v1 +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; vpmovzxbw 0(%rdi), %xmm0 +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; vpmovzxbw (%rdi), %xmm0 ; trap: heap_oob +; movq %rbp, %rsp +; popq %rbp +; retq + +function %sload16x4(i64) -> i32x4 { +block0(v0: i64): + v1 = sload16x4 v0 + return v1 +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; vpmovsxwd 0(%rdi), %xmm0 +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; vpmovsxwd (%rdi), %xmm0 ; trap: heap_oob +; movq %rbp, %rsp +; popq %rbp +; retq + +function %uload16x4(i64) -> i32x4 { +block0(v0: i64): + v1 = uload16x4 v0 + return v1 +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; vpmovzxwd 0(%rdi), %xmm0 +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; vpmovzxwd (%rdi), %xmm0 ; trap: heap_oob +; movq %rbp, %rsp +; popq %rbp +; retq + +function %sload32x2(i64) -> i64x2 { +block0(v0: i64): + v1 = sload32x2 v0 + return v1 +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; vpmovsxdq 0(%rdi), %xmm0 +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; vpmovsxdq (%rdi), %xmm0 ; trap: heap_oob +; movq %rbp, %rsp +; popq %rbp +; retq + +function %uload32x2(i64) -> i64x2 { +block0(v0: i64): + v1 = uload32x2 v0 + return v1 +} + +; VCode: +; pushq %rbp +; movq %rsp, %rbp +; block0: +; vpmovzxdq 0(%rdi), %xmm0 +; movq %rbp, %rsp +; popq %rbp +; ret +; +; Disassembled: +; block0: ; offset 0x0 +; pushq %rbp +; movq %rsp, %rbp +; block1: ; offset 0x4 +; vpmovzxdq (%rdi), %xmm0 ; trap: heap_oob +; movq %rbp, %rsp +; popq %rbp +; retq +