Use regalloc constraints for sse blend operations (#5251)
Instead of using xmm0 explicitly for the mask argument to instructions like blendvpd, use regalloc constraints to constrain it to xmm0 instead.
This commit is contained in:
@@ -202,6 +202,17 @@
|
|||||||
(src2 XmmMem)
|
(src2 XmmMem)
|
||||||
(dst WritableXmm))
|
(dst WritableXmm))
|
||||||
|
|
||||||
|
;; XMM (scalar or vector) blend op. The mask is used to blend between
|
||||||
|
;; src1 and src2. This differs from a use of `XmmRmR` as the mask is
|
||||||
|
;; implicitly in register xmm0; this special case exists to allow us to
|
||||||
|
;; communicate the constraint on the `mask` register to regalloc2.
|
||||||
|
(XmmRmRBlend
|
||||||
|
(op SseOpcode)
|
||||||
|
(src1 Xmm)
|
||||||
|
(src2 XmmMem)
|
||||||
|
(mask Xmm)
|
||||||
|
(dst WritableXmm))
|
||||||
|
|
||||||
;; XMM (scalar or vector) binary op that relies on the VEX prefix.
|
;; XMM (scalar or vector) binary op that relies on the VEX prefix.
|
||||||
(XmmRmRVex (op AvxOpcode)
|
(XmmRmRVex (op AvxOpcode)
|
||||||
(src1 Xmm)
|
(src1 Xmm)
|
||||||
@@ -1353,15 +1364,6 @@
|
|||||||
(decl intcc_without_eq (IntCC) IntCC)
|
(decl intcc_without_eq (IntCC) IntCC)
|
||||||
(extern constructor intcc_without_eq intcc_without_eq)
|
(extern constructor intcc_without_eq intcc_without_eq)
|
||||||
|
|
||||||
;;;; Helpers for Getting Particular Physical Registers ;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
||||||
;;
|
|
||||||
;; These should only be used for legalization purposes, when we can't otherwise
|
|
||||||
;; rely on something like `Inst::mov_mitosis` to put an operand into the
|
|
||||||
;; appropriate physical register for whatever reason.
|
|
||||||
|
|
||||||
(decl xmm0 () WritableXmm)
|
|
||||||
(extern constructor xmm0 xmm0)
|
|
||||||
|
|
||||||
;;;; Helpers for determining the register class of a value type ;;;;;;;;;;;;;;;;
|
;;;; Helpers for determining the register class of a value type ;;;;;;;;;;;;;;;;
|
||||||
|
|
||||||
(type RegisterClass
|
(type RegisterClass
|
||||||
@@ -2432,33 +2434,21 @@
|
|||||||
;; Priority 0 because multi_lane overlaps with the previous two type patterns.
|
;; Priority 0 because multi_lane overlaps with the previous two type patterns.
|
||||||
(rule 0 (sse_mov_op (multi_lane _bits _lanes)) (SseOpcode.Movdqa))
|
(rule 0 (sse_mov_op (multi_lane _bits _lanes)) (SseOpcode.Movdqa))
|
||||||
|
|
||||||
|
(decl xmm_rm_r_blend (SseOpcode Xmm XmmMem Xmm) Xmm)
|
||||||
|
(rule (xmm_rm_r_blend op src1 src2 mask)
|
||||||
|
(let ((dst WritableXmm (temp_writable_xmm))
|
||||||
|
(_ Unit (emit (MInst.XmmRmRBlend op src1 src2 mask dst))))
|
||||||
|
dst))
|
||||||
|
|
||||||
;; Helper for creating `blendvp{d,s}` and `pblendvb` instructions.
|
;; Helper for creating `blendvp{d,s}` and `pblendvb` instructions.
|
||||||
(decl x64_blend (Type XmmMem XmmMem Xmm) Xmm)
|
(decl x64_blend (Type Xmm XmmMem Xmm) Xmm)
|
||||||
(rule (x64_blend ty mask src1 src2)
|
(rule (x64_blend ty mask src1 src2)
|
||||||
;; Move the mask into `xmm0`, as blend instructions implicitly operate on
|
(xmm_rm_r_blend (sse_blend_op ty) src2 src1 mask))
|
||||||
;; that register. (This kind of thing would normally happen inside of
|
|
||||||
;; `Inst::mov_mitosis`, but has to happen here, where we still have the
|
|
||||||
;; mask register, because the mask is implicit and doesn't appear in the
|
|
||||||
;; `Inst` itself.)
|
|
||||||
(let ((mask2 WritableXmm (xmm0))
|
|
||||||
(_ Unit (emit (MInst.XmmUnaryRmR (sse_mov_op ty)
|
|
||||||
mask
|
|
||||||
mask2))))
|
|
||||||
(xmm_rm_r ty (sse_blend_op ty) src2 src1)))
|
|
||||||
|
|
||||||
;; Helper for creating `blendvpd` instructions.
|
;; Helper for creating `blendvpd` instructions.
|
||||||
(decl x64_blendvpd (Xmm XmmMem Xmm) Xmm)
|
(decl x64_blendvpd (Xmm XmmMem Xmm) Xmm)
|
||||||
(rule (x64_blendvpd src1 src2 mask)
|
(rule (x64_blendvpd src1 src2 mask)
|
||||||
;; Move the mask into `xmm0`, as `blendvpd` implicitly operates on that
|
(xmm_rm_r_blend (SseOpcode.Blendvpd) src1 src2 mask))
|
||||||
;; register. (This kind of thing would normally happen inside of
|
|
||||||
;; `Inst::mov_mitosis`, but has to happen here, where we still have the
|
|
||||||
;; mask register, because the mask is implicit and doesn't appear in the
|
|
||||||
;; `Inst` itself.)
|
|
||||||
(let ((mask2 WritableXmm (xmm0))
|
|
||||||
(_ Unit (emit (MInst.XmmUnaryRmR (SseOpcode.Movapd)
|
|
||||||
mask
|
|
||||||
mask2))))
|
|
||||||
(xmm_rm_r $F64X2 (SseOpcode.Blendvpd) src1 src2)))
|
|
||||||
|
|
||||||
;; Helper for creating `movsd` instructions.
|
;; Helper for creating `movsd` instructions.
|
||||||
(decl x64_movsd_regmove (Xmm XmmMem) Xmm)
|
(decl x64_movsd_regmove (Xmm XmmMem) Xmm)
|
||||||
|
|||||||
@@ -1820,8 +1820,6 @@ pub(crate) fn emit(
|
|||||||
SseOpcode::Andpd => (LegacyPrefixes::_66, 0x0F54, 2),
|
SseOpcode::Andpd => (LegacyPrefixes::_66, 0x0F54, 2),
|
||||||
SseOpcode::Andnps => (LegacyPrefixes::None, 0x0F55, 2),
|
SseOpcode::Andnps => (LegacyPrefixes::None, 0x0F55, 2),
|
||||||
SseOpcode::Andnpd => (LegacyPrefixes::_66, 0x0F55, 2),
|
SseOpcode::Andnpd => (LegacyPrefixes::_66, 0x0F55, 2),
|
||||||
SseOpcode::Blendvps => (LegacyPrefixes::_66, 0x0F3814, 3),
|
|
||||||
SseOpcode::Blendvpd => (LegacyPrefixes::_66, 0x0F3815, 3),
|
|
||||||
SseOpcode::Divps => (LegacyPrefixes::None, 0x0F5E, 2),
|
SseOpcode::Divps => (LegacyPrefixes::None, 0x0F5E, 2),
|
||||||
SseOpcode::Divpd => (LegacyPrefixes::_66, 0x0F5E, 2),
|
SseOpcode::Divpd => (LegacyPrefixes::_66, 0x0F5E, 2),
|
||||||
SseOpcode::Divss => (LegacyPrefixes::_F3, 0x0F5E, 2),
|
SseOpcode::Divss => (LegacyPrefixes::_F3, 0x0F5E, 2),
|
||||||
@@ -1859,7 +1857,6 @@ pub(crate) fn emit(
|
|||||||
SseOpcode::Pandn => (LegacyPrefixes::_66, 0x0FDF, 2),
|
SseOpcode::Pandn => (LegacyPrefixes::_66, 0x0FDF, 2),
|
||||||
SseOpcode::Pavgb => (LegacyPrefixes::_66, 0x0FE0, 2),
|
SseOpcode::Pavgb => (LegacyPrefixes::_66, 0x0FE0, 2),
|
||||||
SseOpcode::Pavgw => (LegacyPrefixes::_66, 0x0FE3, 2),
|
SseOpcode::Pavgw => (LegacyPrefixes::_66, 0x0FE3, 2),
|
||||||
SseOpcode::Pblendvb => (LegacyPrefixes::_66, 0x0F3810, 3),
|
|
||||||
SseOpcode::Pcmpeqb => (LegacyPrefixes::_66, 0x0F74, 2),
|
SseOpcode::Pcmpeqb => (LegacyPrefixes::_66, 0x0F74, 2),
|
||||||
SseOpcode::Pcmpeqw => (LegacyPrefixes::_66, 0x0F75, 2),
|
SseOpcode::Pcmpeqw => (LegacyPrefixes::_66, 0x0F75, 2),
|
||||||
SseOpcode::Pcmpeqd => (LegacyPrefixes::_66, 0x0F76, 2),
|
SseOpcode::Pcmpeqd => (LegacyPrefixes::_66, 0x0F76, 2),
|
||||||
@@ -1924,6 +1921,39 @@ pub(crate) fn emit(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Inst::XmmRmRBlend {
|
||||||
|
op,
|
||||||
|
src1,
|
||||||
|
src2,
|
||||||
|
dst,
|
||||||
|
mask,
|
||||||
|
} => {
|
||||||
|
let src1 = allocs.next(src1.to_reg());
|
||||||
|
let mask = allocs.next(mask.to_reg());
|
||||||
|
debug_assert_eq!(mask, regs::xmm0());
|
||||||
|
let reg_g = allocs.next(dst.to_reg().to_reg());
|
||||||
|
debug_assert_eq!(src1, reg_g);
|
||||||
|
let src_e = src2.clone().to_reg_mem().with_allocs(allocs);
|
||||||
|
|
||||||
|
let rex = RexFlags::clear_w();
|
||||||
|
let (prefix, opcode, length) = match op {
|
||||||
|
SseOpcode::Blendvps => (LegacyPrefixes::_66, 0x0F3814, 3),
|
||||||
|
SseOpcode::Blendvpd => (LegacyPrefixes::_66, 0x0F3815, 3),
|
||||||
|
SseOpcode::Pblendvb => (LegacyPrefixes::_66, 0x0F3810, 3),
|
||||||
|
_ => unimplemented!("Opcode {:?} not implemented", op),
|
||||||
|
};
|
||||||
|
|
||||||
|
match src_e {
|
||||||
|
RegMem::Reg { reg: reg_e } => {
|
||||||
|
emit_std_reg_reg(sink, prefix, opcode, length, reg_g, reg_e, rex);
|
||||||
|
}
|
||||||
|
RegMem::Mem { addr } => {
|
||||||
|
let addr = &addr.finalize(state, sink);
|
||||||
|
emit_std_reg_mem(sink, info, prefix, opcode, length, reg_g, addr, rex, 0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
Inst::XmmRmRVex {
|
Inst::XmmRmRVex {
|
||||||
op,
|
op,
|
||||||
src1,
|
src1,
|
||||||
|
|||||||
@@ -131,6 +131,16 @@ impl Inst {
|
|||||||
size,
|
size,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn xmm_rm_r_blend(op: SseOpcode, src2: RegMem, dst: Writable<Reg>) -> Inst {
|
||||||
|
Inst::XmmRmRBlend {
|
||||||
|
op,
|
||||||
|
src1: Xmm::new(dst.to_reg()).unwrap(),
|
||||||
|
src2: XmmMem::new(src2).unwrap(),
|
||||||
|
mask: Xmm::new(regs::xmm0()).unwrap(),
|
||||||
|
dst: WritableXmm::from_writable_reg(dst).unwrap(),
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
@@ -3961,19 +3971,19 @@ fn test_x64_emit() {
|
|||||||
));
|
));
|
||||||
|
|
||||||
insns.push((
|
insns.push((
|
||||||
Inst::xmm_rm_r(SseOpcode::Blendvpd, RegMem::reg(xmm15), w_xmm4),
|
Inst::xmm_rm_r_blend(SseOpcode::Blendvpd, RegMem::reg(xmm15), w_xmm4),
|
||||||
"66410F3815E7",
|
"66410F3815E7",
|
||||||
"blendvpd %xmm4, %xmm15, %xmm4",
|
"blendvpd %xmm4, %xmm15, %xmm4",
|
||||||
));
|
));
|
||||||
|
|
||||||
insns.push((
|
insns.push((
|
||||||
Inst::xmm_rm_r(SseOpcode::Blendvps, RegMem::reg(xmm2), w_xmm3),
|
Inst::xmm_rm_r_blend(SseOpcode::Blendvps, RegMem::reg(xmm2), w_xmm3),
|
||||||
"660F3814DA",
|
"660F3814DA",
|
||||||
"blendvps %xmm3, %xmm2, %xmm3",
|
"blendvps %xmm3, %xmm2, %xmm3",
|
||||||
));
|
));
|
||||||
|
|
||||||
insns.push((
|
insns.push((
|
||||||
Inst::xmm_rm_r(SseOpcode::Pblendvb, RegMem::reg(xmm12), w_xmm13),
|
Inst::xmm_rm_r_blend(SseOpcode::Pblendvb, RegMem::reg(xmm12), w_xmm13),
|
||||||
"66450F3810EC",
|
"66450F3810EC",
|
||||||
"pblendvb %xmm13, %xmm12, %xmm13",
|
"pblendvb %xmm13, %xmm12, %xmm13",
|
||||||
));
|
));
|
||||||
|
|||||||
@@ -3,7 +3,7 @@
|
|||||||
use crate::binemit::{Addend, CodeOffset, Reloc, StackMap};
|
use crate::binemit::{Addend, CodeOffset, Reloc, StackMap};
|
||||||
use crate::ir::{types, ExternalName, LibCall, Opcode, RelSourceLoc, TrapCode, Type};
|
use crate::ir::{types, ExternalName, LibCall, Opcode, RelSourceLoc, TrapCode, Type};
|
||||||
use crate::isa::x64::abi::X64ABIMachineSpec;
|
use crate::isa::x64::abi::X64ABIMachineSpec;
|
||||||
use crate::isa::x64::inst::regs::pretty_print_reg;
|
use crate::isa::x64::inst::regs::{pretty_print_reg, show_ireg_sized};
|
||||||
use crate::isa::x64::settings as x64_settings;
|
use crate::isa::x64::settings as x64_settings;
|
||||||
use crate::isa::CallConv;
|
use crate::isa::CallConv;
|
||||||
use crate::{machinst::*, trace};
|
use crate::{machinst::*, trace};
|
||||||
@@ -130,6 +130,7 @@ impl Inst {
|
|||||||
| Inst::XmmMovRM { op, .. }
|
| Inst::XmmMovRM { op, .. }
|
||||||
| Inst::XmmRmiReg { opcode: op, .. }
|
| Inst::XmmRmiReg { opcode: op, .. }
|
||||||
| Inst::XmmRmR { op, .. }
|
| Inst::XmmRmR { op, .. }
|
||||||
|
| Inst::XmmRmRBlend { op, .. }
|
||||||
| Inst::XmmRmRImm { op, .. }
|
| Inst::XmmRmRImm { op, .. }
|
||||||
| Inst::XmmToGpr { op, .. }
|
| Inst::XmmToGpr { op, .. }
|
||||||
| Inst::XmmUnaryRmRImm { op, .. }
|
| Inst::XmmUnaryRmRImm { op, .. }
|
||||||
@@ -938,6 +939,33 @@ impl PrettyPrint for Inst {
|
|||||||
format!("{} {}, {}, {}", ljustify(op.to_string()), src1, src2, dst)
|
format!("{} {}, {}, {}", ljustify(op.to_string()), src1, src2, dst)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Inst::XmmRmRBlend {
|
||||||
|
op,
|
||||||
|
src1,
|
||||||
|
src2,
|
||||||
|
mask,
|
||||||
|
dst,
|
||||||
|
} => {
|
||||||
|
let src1 = pretty_print_reg(src1.to_reg(), 8, allocs);
|
||||||
|
let mask = allocs.next(mask.to_reg());
|
||||||
|
let mask = if mask.is_virtual() {
|
||||||
|
format!(" <{}>", show_ireg_sized(mask, 8))
|
||||||
|
} else {
|
||||||
|
debug_assert_eq!(mask, regs::xmm0());
|
||||||
|
String::new()
|
||||||
|
};
|
||||||
|
let dst = pretty_print_reg(dst.to_reg().to_reg(), 8, allocs);
|
||||||
|
let src2 = src2.pretty_print(8, allocs);
|
||||||
|
format!(
|
||||||
|
"{} {}, {}, {}{}",
|
||||||
|
ljustify(op.to_string()),
|
||||||
|
src1,
|
||||||
|
src2,
|
||||||
|
dst,
|
||||||
|
mask
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
Inst::XmmRmRVex {
|
Inst::XmmRmRVex {
|
||||||
op,
|
op,
|
||||||
src1,
|
src1,
|
||||||
@@ -1765,11 +1793,7 @@ fn x64_get_operands<F: Fn(VReg) -> VReg>(inst: &Inst, collector: &mut OperandCol
|
|||||||
src.get_operands(collector);
|
src.get_operands(collector);
|
||||||
}
|
}
|
||||||
Inst::XmmRmR {
|
Inst::XmmRmR {
|
||||||
src1,
|
src1, src2, dst, ..
|
||||||
src2,
|
|
||||||
dst,
|
|
||||||
op,
|
|
||||||
..
|
|
||||||
} => {
|
} => {
|
||||||
if inst.produces_const() {
|
if inst.produces_const() {
|
||||||
collector.reg_def(dst.to_writable_reg());
|
collector.reg_def(dst.to_writable_reg());
|
||||||
@@ -1777,15 +1801,24 @@ fn x64_get_operands<F: Fn(VReg) -> VReg>(inst: &Inst, collector: &mut OperandCol
|
|||||||
collector.reg_use(src1.to_reg());
|
collector.reg_use(src1.to_reg());
|
||||||
collector.reg_reuse_def(dst.to_writable_reg(), 0);
|
collector.reg_reuse_def(dst.to_writable_reg(), 0);
|
||||||
src2.get_operands(collector);
|
src2.get_operands(collector);
|
||||||
|
}
|
||||||
// Some instructions have an implicit use of XMM0.
|
}
|
||||||
if *op == SseOpcode::Blendvpd
|
Inst::XmmRmRBlend {
|
||||||
|
src1,
|
||||||
|
src2,
|
||||||
|
mask,
|
||||||
|
dst,
|
||||||
|
op,
|
||||||
|
} => {
|
||||||
|
assert!(
|
||||||
|
*op == SseOpcode::Blendvpd
|
||||||
|| *op == SseOpcode::Blendvps
|
|| *op == SseOpcode::Blendvps
|
||||||
|| *op == SseOpcode::Pblendvb
|
|| *op == SseOpcode::Pblendvb
|
||||||
{
|
);
|
||||||
collector.reg_use(regs::xmm0());
|
collector.reg_use(src1.to_reg());
|
||||||
}
|
collector.reg_fixed_use(mask.to_reg(), regs::xmm0());
|
||||||
}
|
collector.reg_reuse_def(dst.to_writable_reg(), 0);
|
||||||
|
src2.get_operands(collector);
|
||||||
}
|
}
|
||||||
Inst::XmmRmRVex {
|
Inst::XmmRmRVex {
|
||||||
op,
|
op,
|
||||||
|
|||||||
@@ -336,11 +336,6 @@ impl Context for IsleContext<'_, '_, MInst, Flags, IsaFlags, 6> {
|
|||||||
0b00_00_00_00 | lane << 4
|
0b00_00_00_00 | lane << 4
|
||||||
}
|
}
|
||||||
|
|
||||||
#[inline]
|
|
||||||
fn xmm0(&mut self) -> WritableXmm {
|
|
||||||
WritableXmm::from_reg(Xmm::new(regs::xmm0()).unwrap())
|
|
||||||
}
|
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
fn synthetic_amode_to_reg_mem(&mut self, addr: &SyntheticAmode) -> RegMem {
|
fn synthetic_amode_to_reg_mem(&mut self, addr: &SyntheticAmode) -> RegMem {
|
||||||
RegMem::mem(addr.clone())
|
RegMem::mem(addr.clone())
|
||||||
|
|||||||
@@ -16,9 +16,9 @@ block0(v0: i8x16, v1: i8x16):
|
|||||||
; pcmpeqb %xmm4, %xmm1, %xmm4
|
; pcmpeqb %xmm4, %xmm1, %xmm4
|
||||||
; movdqa %xmm0, %xmm7
|
; movdqa %xmm0, %xmm7
|
||||||
; movdqa %xmm4, %xmm0
|
; movdqa %xmm4, %xmm0
|
||||||
; movdqa %xmm1, %xmm5
|
; movdqa %xmm1, %xmm4
|
||||||
; pblendvb %xmm5, %xmm7, %xmm5
|
; pblendvb %xmm4, %xmm7, %xmm4
|
||||||
; movdqa %xmm5, %xmm0
|
; movdqa %xmm4, %xmm0
|
||||||
; movq %rbp, %rsp
|
; movq %rbp, %rsp
|
||||||
; popq %rbp
|
; popq %rbp
|
||||||
; ret
|
; ret
|
||||||
@@ -34,9 +34,9 @@ block0(v0: f32x4, v1: f32x4, v2: i32x4, v3: i32x4):
|
|||||||
; movq %rsp, %rbp
|
; movq %rsp, %rbp
|
||||||
; block0:
|
; block0:
|
||||||
; cmpps $0, %xmm0, %xmm1, %xmm0
|
; cmpps $0, %xmm0, %xmm1, %xmm0
|
||||||
; movdqa %xmm3, %xmm7
|
; movdqa %xmm3, %xmm6
|
||||||
; pblendvb %xmm7, %xmm2, %xmm7
|
; pblendvb %xmm6, %xmm2, %xmm6
|
||||||
; movdqa %xmm7, %xmm0
|
; movdqa %xmm6, %xmm0
|
||||||
; movq %rbp, %rsp
|
; movq %rbp, %rsp
|
||||||
; popq %rbp
|
; popq %rbp
|
||||||
; ret
|
; ret
|
||||||
@@ -72,10 +72,10 @@ block0(v0: i8x16, v1: i8x16):
|
|||||||
; block0:
|
; block0:
|
||||||
; movdqa %xmm0, %xmm5
|
; movdqa %xmm0, %xmm5
|
||||||
; movdqu const(0), %xmm0
|
; movdqu const(0), %xmm0
|
||||||
; movdqa %xmm5, %xmm7
|
; movdqa %xmm5, %xmm6
|
||||||
; movdqa %xmm1, %xmm5
|
; movdqa %xmm1, %xmm4
|
||||||
; pblendvb %xmm5, %xmm7, %xmm5
|
; pblendvb %xmm4, %xmm6, %xmm4
|
||||||
; movdqa %xmm5, %xmm0
|
; movdqa %xmm4, %xmm0
|
||||||
; movq %rbp, %rsp
|
; movq %rbp, %rsp
|
||||||
; popq %rbp
|
; popq %rbp
|
||||||
; ret
|
; ret
|
||||||
@@ -92,10 +92,10 @@ block0(v0: i16x8, v1: i16x8):
|
|||||||
; block0:
|
; block0:
|
||||||
; movdqa %xmm0, %xmm5
|
; movdqa %xmm0, %xmm5
|
||||||
; movdqu const(0), %xmm0
|
; movdqu const(0), %xmm0
|
||||||
; movdqa %xmm5, %xmm7
|
; movdqa %xmm5, %xmm6
|
||||||
; movdqa %xmm1, %xmm5
|
; movdqa %xmm1, %xmm4
|
||||||
; pblendvb %xmm5, %xmm7, %xmm5
|
; pblendvb %xmm4, %xmm6, %xmm4
|
||||||
; movdqa %xmm5, %xmm0
|
; movdqa %xmm4, %xmm0
|
||||||
; movq %rbp, %rsp
|
; movq %rbp, %rsp
|
||||||
; popq %rbp
|
; popq %rbp
|
||||||
; ret
|
; ret
|
||||||
|
|||||||
@@ -137,9 +137,9 @@ block0(v0: i16x8, v1: i16x8, v2: i16x8):
|
|||||||
; pushq %rbp
|
; pushq %rbp
|
||||||
; movq %rsp, %rbp
|
; movq %rsp, %rbp
|
||||||
; block0:
|
; block0:
|
||||||
; movdqa %xmm2, %xmm5
|
; movdqa %xmm2, %xmm4
|
||||||
; pblendvb %xmm5, %xmm1, %xmm5
|
; pblendvb %xmm4, %xmm1, %xmm4
|
||||||
; movdqa %xmm5, %xmm0
|
; movdqa %xmm4, %xmm0
|
||||||
; movq %rbp, %rsp
|
; movq %rbp, %rsp
|
||||||
; popq %rbp
|
; popq %rbp
|
||||||
; ret
|
; ret
|
||||||
@@ -153,9 +153,9 @@ block0(v0: i32x4, v1: f32x4, v2: f32x4):
|
|||||||
; pushq %rbp
|
; pushq %rbp
|
||||||
; movq %rsp, %rbp
|
; movq %rsp, %rbp
|
||||||
; block0:
|
; block0:
|
||||||
; movdqa %xmm2, %xmm5
|
; movdqa %xmm2, %xmm4
|
||||||
; blendvps %xmm5, %xmm1, %xmm5
|
; blendvps %xmm4, %xmm1, %xmm4
|
||||||
; movdqa %xmm5, %xmm0
|
; movdqa %xmm4, %xmm0
|
||||||
; movq %rbp, %rsp
|
; movq %rbp, %rsp
|
||||||
; popq %rbp
|
; popq %rbp
|
||||||
; ret
|
; ret
|
||||||
@@ -169,9 +169,9 @@ block0(v0: i64x2, v1: f64x2, v2: f64x2):
|
|||||||
; pushq %rbp
|
; pushq %rbp
|
||||||
; movq %rsp, %rbp
|
; movq %rsp, %rbp
|
||||||
; block0:
|
; block0:
|
||||||
; movdqa %xmm2, %xmm5
|
; movdqa %xmm2, %xmm4
|
||||||
; blendvpd %xmm5, %xmm1, %xmm5
|
; blendvpd %xmm4, %xmm1, %xmm4
|
||||||
; movdqa %xmm5, %xmm0
|
; movdqa %xmm4, %xmm0
|
||||||
; movq %rbp, %rsp
|
; movq %rbp, %rsp
|
||||||
; popq %rbp
|
; popq %rbp
|
||||||
; ret
|
; ret
|
||||||
|
|||||||
Reference in New Issue
Block a user