x64: Remove conditional SseOpcode::uses_src1 (#5842)
This is a follow-up to comments in #5795 to remove some cruft in the x64 instruction model to ensure that the shape of an `Inst` reflects what's going to happen in regalloc and encoding. This accessor was used to handle `round*`, `pextr*`, and `pshufb` instructions. The `round*` ones had already moved to the appropriate `XmmUnary*` variant and `pshufb` was additionally moved over to that variant as well. The `pextr*` instructions got a new `Inst` variant and additionally had their constructors slightly modified to no longer require the type as input. The encoding for these instructions now automatically handles the various type-related operands through a new `SseOpcode::Pextrq` operand to represent 64-bit movements.
This commit is contained in:
@@ -331,6 +331,12 @@
|
|||||||
(dst WritableGpr)
|
(dst WritableGpr)
|
||||||
(dst_size OperandSize))
|
(dst_size OperandSize))
|
||||||
|
|
||||||
|
;; XMM (scalar) unary op (from xmm to integer reg): pextr{w,b,d,q}
|
||||||
|
(XmmToGprImm (op SseOpcode)
|
||||||
|
(src Xmm)
|
||||||
|
(dst WritableGpr)
|
||||||
|
(imm u8))
|
||||||
|
|
||||||
;; XMM (scalar) unary op (from integer to float reg): movd, movq,
|
;; XMM (scalar) unary op (from integer to float reg): movd, movq,
|
||||||
;; cvtsi2s{s,d}
|
;; cvtsi2s{s,d}
|
||||||
(GprToXmm (op SseOpcode)
|
(GprToXmm (op SseOpcode)
|
||||||
@@ -749,6 +755,7 @@
|
|||||||
Pextrb
|
Pextrb
|
||||||
Pextrw
|
Pextrw
|
||||||
Pextrd
|
Pextrd
|
||||||
|
Pextrq
|
||||||
Pinsrb
|
Pinsrb
|
||||||
Pinsrw
|
Pinsrw
|
||||||
Pinsrd
|
Pinsrd
|
||||||
@@ -3110,16 +3117,9 @@
|
|||||||
(xmm_rmr_imm_vex (AvxOpcode.Vinsertps) src1 src2 lane))
|
(xmm_rmr_imm_vex (AvxOpcode.Vinsertps) src1 src2 lane))
|
||||||
|
|
||||||
;; Helper for creating `pshufd` instructions.
|
;; Helper for creating `pshufd` instructions.
|
||||||
(decl x64_pshufd (XmmMem u8 OperandSize) Xmm)
|
(decl x64_pshufd (XmmMem u8) Xmm)
|
||||||
(rule (x64_pshufd src imm size)
|
(rule (x64_pshufd src imm)
|
||||||
(let ((dst WritableXmm (temp_writable_xmm))
|
(xmm_unary_rm_r_imm (SseOpcode.Pshufd) src imm))
|
||||||
(_ Unit (emit (MInst.XmmRmRImm (SseOpcode.Pshufd)
|
|
||||||
dst
|
|
||||||
src
|
|
||||||
dst
|
|
||||||
imm
|
|
||||||
size))))
|
|
||||||
dst))
|
|
||||||
|
|
||||||
;; Helper for creating `pshufb` instructions.
|
;; Helper for creating `pshufb` instructions.
|
||||||
(decl x64_pshufb (Xmm XmmMem) Xmm)
|
(decl x64_pshufb (Xmm XmmMem) Xmm)
|
||||||
@@ -3314,40 +3314,24 @@
|
|||||||
(xmm_rmir_vex (AvxOpcode.Vpsrad) src1 src2))
|
(xmm_rmir_vex (AvxOpcode.Vpsrad) src1 src2))
|
||||||
|
|
||||||
;; Helper for creating `pextrb` instructions.
|
;; Helper for creating `pextrb` instructions.
|
||||||
(decl x64_pextrb (Type Xmm u8) Gpr)
|
(decl x64_pextrb (Xmm u8) Gpr)
|
||||||
(rule (x64_pextrb ty src lane)
|
(rule (x64_pextrb src lane)
|
||||||
(let ((dst WritableGpr (temp_writable_gpr))
|
(xmm_to_gpr_imm (SseOpcode.Pextrb) src lane))
|
||||||
(_ Unit (emit (MInst.XmmRmRImm (SseOpcode.Pextrb)
|
|
||||||
dst
|
|
||||||
src
|
|
||||||
dst
|
|
||||||
lane
|
|
||||||
(operand_size_of_type_32_64 (lane_type ty))))))
|
|
||||||
dst))
|
|
||||||
|
|
||||||
;; Helper for creating `pextrw` instructions.
|
;; Helper for creating `pextrw` instructions.
|
||||||
(decl x64_pextrw (Type Xmm u8) Gpr)
|
(decl x64_pextrw (Xmm u8) Gpr)
|
||||||
(rule (x64_pextrw ty src lane)
|
(rule (x64_pextrw src lane)
|
||||||
(let ((dst WritableGpr (temp_writable_gpr))
|
(xmm_to_gpr_imm (SseOpcode.Pextrw) src lane))
|
||||||
(_ Unit (emit (MInst.XmmRmRImm (SseOpcode.Pextrw)
|
|
||||||
dst
|
|
||||||
src
|
|
||||||
dst
|
|
||||||
lane
|
|
||||||
(operand_size_of_type_32_64 (lane_type ty))))))
|
|
||||||
dst))
|
|
||||||
|
|
||||||
;; Helper for creating `pextrd` instructions.
|
;; Helper for creating `pextrd` instructions.
|
||||||
(decl x64_pextrd (Type Xmm u8) Gpr)
|
(decl x64_pextrd (Xmm u8) Gpr)
|
||||||
(rule (x64_pextrd ty src lane)
|
(rule (x64_pextrd src lane)
|
||||||
(let ((dst WritableGpr (temp_writable_gpr))
|
(xmm_to_gpr_imm (SseOpcode.Pextrd) src lane))
|
||||||
(_ Unit (emit (MInst.XmmRmRImm (SseOpcode.Pextrd)
|
|
||||||
dst
|
;; Helper for creating `pextrq` instructions.
|
||||||
src
|
(decl x64_pextrq (Xmm u8) Gpr)
|
||||||
dst
|
(rule (x64_pextrq src lane)
|
||||||
lane
|
(xmm_to_gpr_imm (SseOpcode.Pextrq) src lane))
|
||||||
(operand_size_of_type_32_64 (lane_type ty))))))
|
|
||||||
dst))
|
|
||||||
|
|
||||||
;; Helper for creating `MInst.XmmToGpr` instructions.
|
;; Helper for creating `MInst.XmmToGpr` instructions.
|
||||||
(decl xmm_to_gpr (SseOpcode Xmm OperandSize) Gpr)
|
(decl xmm_to_gpr (SseOpcode Xmm OperandSize) Gpr)
|
||||||
@@ -3356,6 +3340,13 @@
|
|||||||
(_ Unit (emit (MInst.XmmToGpr op src dst size))))
|
(_ Unit (emit (MInst.XmmToGpr op src dst size))))
|
||||||
dst))
|
dst))
|
||||||
|
|
||||||
|
;; Helper for creating `MInst.XmmToGpr` instructions.
|
||||||
|
(decl xmm_to_gpr_imm (SseOpcode Xmm u8) Gpr)
|
||||||
|
(rule (xmm_to_gpr_imm op src imm)
|
||||||
|
(let ((dst WritableGpr (temp_writable_gpr))
|
||||||
|
(_ Unit (emit (MInst.XmmToGprImm op src dst imm))))
|
||||||
|
dst))
|
||||||
|
|
||||||
;; Helper for creating `pmovmskb` instructions.
|
;; Helper for creating `pmovmskb` instructions.
|
||||||
(decl x64_pmovmskb (OperandSize Xmm) Gpr)
|
(decl x64_pmovmskb (OperandSize Xmm) Gpr)
|
||||||
(rule (x64_pmovmskb size src)
|
(rule (x64_pmovmskb size src)
|
||||||
|
|||||||
@@ -999,6 +999,7 @@ pub enum SseOpcode {
|
|||||||
Pextrb,
|
Pextrb,
|
||||||
Pextrw,
|
Pextrw,
|
||||||
Pextrd,
|
Pextrd,
|
||||||
|
Pextrq,
|
||||||
Pinsrb,
|
Pinsrb,
|
||||||
Pinsrw,
|
Pinsrw,
|
||||||
Pinsrd,
|
Pinsrd,
|
||||||
@@ -1237,6 +1238,7 @@ impl SseOpcode {
|
|||||||
| SseOpcode::Pcmpeqq
|
| SseOpcode::Pcmpeqq
|
||||||
| SseOpcode::Pextrb
|
| SseOpcode::Pextrb
|
||||||
| SseOpcode::Pextrd
|
| SseOpcode::Pextrd
|
||||||
|
| SseOpcode::Pextrq
|
||||||
| SseOpcode::Pinsrb
|
| SseOpcode::Pinsrb
|
||||||
| SseOpcode::Pinsrd
|
| SseOpcode::Pinsrd
|
||||||
| SseOpcode::Pmaxsb
|
| SseOpcode::Pmaxsb
|
||||||
@@ -1278,22 +1280,6 @@ impl SseOpcode {
|
|||||||
_ => 8,
|
_ => 8,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Does an XmmRmmRImm with this opcode use src1? FIXME: split
|
|
||||||
/// into separate instructions.
|
|
||||||
pub(crate) fn uses_src1(&self) -> bool {
|
|
||||||
match self {
|
|
||||||
SseOpcode::Pextrb => false,
|
|
||||||
SseOpcode::Pextrw => false,
|
|
||||||
SseOpcode::Pextrd => false,
|
|
||||||
SseOpcode::Pshufd => false,
|
|
||||||
SseOpcode::Roundss => false,
|
|
||||||
SseOpcode::Roundsd => false,
|
|
||||||
SseOpcode::Roundps => false,
|
|
||||||
SseOpcode::Roundpd => false,
|
|
||||||
_ => true,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl fmt::Debug for SseOpcode {
|
impl fmt::Debug for SseOpcode {
|
||||||
@@ -1393,6 +1379,7 @@ impl fmt::Debug for SseOpcode {
|
|||||||
SseOpcode::Pextrb => "pextrb",
|
SseOpcode::Pextrb => "pextrb",
|
||||||
SseOpcode::Pextrw => "pextrw",
|
SseOpcode::Pextrw => "pextrw",
|
||||||
SseOpcode::Pextrd => "pextrd",
|
SseOpcode::Pextrd => "pextrd",
|
||||||
|
SseOpcode::Pextrq => "pextrq",
|
||||||
SseOpcode::Pinsrb => "pinsrb",
|
SseOpcode::Pinsrb => "pinsrb",
|
||||||
SseOpcode::Pinsrw => "pinsrw",
|
SseOpcode::Pinsrw => "pinsrw",
|
||||||
SseOpcode::Pinsrd => "pinsrd",
|
SseOpcode::Pinsrd => "pinsrd",
|
||||||
|
|||||||
@@ -1792,8 +1792,6 @@ pub(crate) fn emit(
|
|||||||
}
|
}
|
||||||
|
|
||||||
Inst::XmmUnaryRmRImm { op, src, dst, imm } => {
|
Inst::XmmUnaryRmRImm { op, src, dst, imm } => {
|
||||||
debug_assert!(!op.uses_src1());
|
|
||||||
|
|
||||||
let dst = allocs.next(dst.to_reg().to_reg());
|
let dst = allocs.next(dst.to_reg().to_reg());
|
||||||
let src = src.clone().to_reg_mem().with_allocs(allocs);
|
let src = src.clone().to_reg_mem().with_allocs(allocs);
|
||||||
let rex = RexFlags::clear_w();
|
let rex = RexFlags::clear_w();
|
||||||
@@ -1803,6 +1801,7 @@ pub(crate) fn emit(
|
|||||||
SseOpcode::Roundss => (LegacyPrefixes::_66, 0x0F3A0A, 3),
|
SseOpcode::Roundss => (LegacyPrefixes::_66, 0x0F3A0A, 3),
|
||||||
SseOpcode::Roundpd => (LegacyPrefixes::_66, 0x0F3A09, 3),
|
SseOpcode::Roundpd => (LegacyPrefixes::_66, 0x0F3A09, 3),
|
||||||
SseOpcode::Roundsd => (LegacyPrefixes::_66, 0x0F3A0B, 3),
|
SseOpcode::Roundsd => (LegacyPrefixes::_66, 0x0F3A0B, 3),
|
||||||
|
SseOpcode::Pshufd => (LegacyPrefixes::_66, 0x0F70, 2),
|
||||||
_ => unimplemented!("Opcode {:?} not implemented", op),
|
_ => unimplemented!("Opcode {:?} not implemented", op),
|
||||||
};
|
};
|
||||||
match src {
|
match src {
|
||||||
@@ -2458,17 +2457,10 @@ pub(crate) fn emit(
|
|||||||
imm,
|
imm,
|
||||||
size,
|
size,
|
||||||
} => {
|
} => {
|
||||||
let (src2, dst) = if !op.uses_src1() {
|
|
||||||
let dst = allocs.next(dst.to_reg());
|
|
||||||
let src2 = src2.with_allocs(allocs);
|
|
||||||
(src2, dst)
|
|
||||||
} else {
|
|
||||||
let src1 = allocs.next(*src1);
|
let src1 = allocs.next(*src1);
|
||||||
let dst = allocs.next(dst.to_reg());
|
let dst = allocs.next(dst.to_reg());
|
||||||
let src2 = src2.with_allocs(allocs);
|
let src2 = src2.with_allocs(allocs);
|
||||||
debug_assert_eq!(src1, dst);
|
debug_assert_eq!(src1, dst);
|
||||||
(src2, dst)
|
|
||||||
};
|
|
||||||
|
|
||||||
let (prefix, opcode, len) = match op {
|
let (prefix, opcode, len) = match op {
|
||||||
SseOpcode::Cmpps => (LegacyPrefixes::None, 0x0FC2, 2),
|
SseOpcode::Cmpps => (LegacyPrefixes::None, 0x0FC2, 2),
|
||||||
@@ -2480,10 +2472,6 @@ pub(crate) fn emit(
|
|||||||
SseOpcode::Pinsrb => (LegacyPrefixes::_66, 0x0F3A20, 3),
|
SseOpcode::Pinsrb => (LegacyPrefixes::_66, 0x0F3A20, 3),
|
||||||
SseOpcode::Pinsrw => (LegacyPrefixes::_66, 0x0FC4, 2),
|
SseOpcode::Pinsrw => (LegacyPrefixes::_66, 0x0FC4, 2),
|
||||||
SseOpcode::Pinsrd => (LegacyPrefixes::_66, 0x0F3A22, 3),
|
SseOpcode::Pinsrd => (LegacyPrefixes::_66, 0x0F3A22, 3),
|
||||||
SseOpcode::Pextrb => (LegacyPrefixes::_66, 0x0F3A14, 3),
|
|
||||||
SseOpcode::Pextrw => (LegacyPrefixes::_66, 0x0FC5, 2),
|
|
||||||
SseOpcode::Pextrd => (LegacyPrefixes::_66, 0x0F3A16, 3),
|
|
||||||
SseOpcode::Pshufd => (LegacyPrefixes::_66, 0x0F70, 2),
|
|
||||||
SseOpcode::Shufps => (LegacyPrefixes::None, 0x0FC6, 2),
|
SseOpcode::Shufps => (LegacyPrefixes::None, 0x0FC6, 2),
|
||||||
_ => unimplemented!("Opcode {:?} not implemented", op),
|
_ => unimplemented!("Opcode {:?} not implemented", op),
|
||||||
};
|
};
|
||||||
@@ -2566,6 +2554,26 @@ pub(crate) fn emit(
|
|||||||
emit_std_reg_reg(sink, prefix, opcode, 2, src, dst, rex);
|
emit_std_reg_reg(sink, prefix, opcode, 2, src, dst, rex);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Inst::XmmToGprImm { op, src, dst, imm } => {
|
||||||
|
use OperandSize as OS;
|
||||||
|
|
||||||
|
let src = allocs.next(src.to_reg());
|
||||||
|
let dst = allocs.next(dst.to_reg().to_reg());
|
||||||
|
|
||||||
|
let (prefix, opcode, opcode_bytes, dst_size, dst_first) = match op {
|
||||||
|
SseOpcode::Pextrb => (LegacyPrefixes::_66, 0x0F3A14, 3, OS::Size32, false),
|
||||||
|
SseOpcode::Pextrw => (LegacyPrefixes::_66, 0x0FC5, 2, OS::Size32, true),
|
||||||
|
SseOpcode::Pextrd => (LegacyPrefixes::_66, 0x0F3A16, 3, OS::Size32, false),
|
||||||
|
SseOpcode::Pextrq => (LegacyPrefixes::_66, 0x0F3A16, 3, OS::Size64, false),
|
||||||
|
_ => panic!("unexpected opcode {:?}", op),
|
||||||
|
};
|
||||||
|
let rex = RexFlags::from(dst_size);
|
||||||
|
let (src, dst) = if dst_first { (dst, src) } else { (src, dst) };
|
||||||
|
|
||||||
|
emit_std_reg_reg(sink, prefix, opcode, opcode_bytes, src, dst, rex);
|
||||||
|
sink.put1(*imm);
|
||||||
|
}
|
||||||
|
|
||||||
Inst::GprToXmm {
|
Inst::GprToXmm {
|
||||||
op,
|
op,
|
||||||
src: src_e,
|
src: src_e,
|
||||||
|
|||||||
@@ -136,6 +136,7 @@ impl Inst {
|
|||||||
| Inst::XmmRmRBlend { op, .. }
|
| Inst::XmmRmRBlend { op, .. }
|
||||||
| Inst::XmmRmRImm { op, .. }
|
| Inst::XmmRmRImm { op, .. }
|
||||||
| Inst::XmmToGpr { op, .. }
|
| Inst::XmmToGpr { op, .. }
|
||||||
|
| Inst::XmmToGprImm { op, .. }
|
||||||
| Inst::XmmUnaryRmRImm { op, .. }
|
| Inst::XmmUnaryRmRImm { op, .. }
|
||||||
| Inst::XmmUnaryRmR { op, .. }
|
| Inst::XmmUnaryRmR { op, .. }
|
||||||
| Inst::XmmConstOp { op, .. } => smallvec![op.available_from()],
|
| Inst::XmmConstOp { op, .. } => smallvec![op.available_from()],
|
||||||
@@ -1111,15 +1112,11 @@ impl PrettyPrint for Inst {
|
|||||||
size,
|
size,
|
||||||
..
|
..
|
||||||
} => {
|
} => {
|
||||||
let src1 = if op.uses_src1() {
|
let src1 = pretty_print_reg(*src1, 8, allocs);
|
||||||
pretty_print_reg(*src1, 8, allocs) + ", "
|
|
||||||
} else {
|
|
||||||
"".into()
|
|
||||||
};
|
|
||||||
let dst = pretty_print_reg(dst.to_reg(), 8, allocs);
|
let dst = pretty_print_reg(dst.to_reg(), 8, allocs);
|
||||||
let src2 = src2.pretty_print(8, allocs);
|
let src2 = src2.pretty_print(8, allocs);
|
||||||
format!(
|
format!(
|
||||||
"{} ${}, {}{}, {}",
|
"{} ${imm}, {src1}, {src2}, {dst}",
|
||||||
ljustify(format!(
|
ljustify(format!(
|
||||||
"{}{}",
|
"{}{}",
|
||||||
op.to_string(),
|
op.to_string(),
|
||||||
@@ -1129,10 +1126,6 @@ impl PrettyPrint for Inst {
|
|||||||
""
|
""
|
||||||
}
|
}
|
||||||
)),
|
)),
|
||||||
imm,
|
|
||||||
src1,
|
|
||||||
src2,
|
|
||||||
dst,
|
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1153,6 +1146,12 @@ impl PrettyPrint for Inst {
|
|||||||
format!("{} {}, {}", ljustify(op.to_string()), src, dst)
|
format!("{} {}, {}", ljustify(op.to_string()), src, dst)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Inst::XmmToGprImm { op, src, dst, imm } => {
|
||||||
|
let src = pretty_print_reg(src.to_reg(), 8, allocs);
|
||||||
|
let dst = pretty_print_reg(dst.to_reg().to_reg(), 8, allocs);
|
||||||
|
format!("{} ${imm}, {}, {}", ljustify(op.to_string()), src, dst)
|
||||||
|
}
|
||||||
|
|
||||||
Inst::GprToXmm {
|
Inst::GprToXmm {
|
||||||
op,
|
op,
|
||||||
src,
|
src,
|
||||||
@@ -1976,24 +1975,12 @@ fn x64_get_operands<F: Fn(VReg) -> VReg>(inst: &Inst, collector: &mut OperandCol
|
|||||||
src1.get_operands(collector);
|
src1.get_operands(collector);
|
||||||
}
|
}
|
||||||
Inst::XmmRmRImm {
|
Inst::XmmRmRImm {
|
||||||
op,
|
src1, src2, dst, ..
|
||||||
src1,
|
|
||||||
src2,
|
|
||||||
dst,
|
|
||||||
..
|
|
||||||
} => {
|
} => {
|
||||||
if !op.uses_src1() {
|
|
||||||
// FIXME: split this instruction into two, so we don't
|
|
||||||
// need this awkward src1-is-only-sometimes-an-arg
|
|
||||||
// behavior.
|
|
||||||
collector.reg_def(*dst);
|
|
||||||
src2.get_operands(collector);
|
|
||||||
} else {
|
|
||||||
collector.reg_use(*src1);
|
collector.reg_use(*src1);
|
||||||
collector.reg_reuse_def(*dst, 0);
|
collector.reg_reuse_def(*dst, 0);
|
||||||
src2.get_operands(collector);
|
src2.get_operands(collector);
|
||||||
}
|
}
|
||||||
}
|
|
||||||
Inst::XmmConstOp { dst, .. } => {
|
Inst::XmmConstOp { dst, .. } => {
|
||||||
collector.reg_def(dst.to_writable_reg());
|
collector.reg_def(dst.to_writable_reg());
|
||||||
}
|
}
|
||||||
@@ -2035,7 +2022,7 @@ fn x64_get_operands<F: Fn(VReg) -> VReg>(inst: &Inst, collector: &mut OperandCol
|
|||||||
collector.reg_use(src.to_reg());
|
collector.reg_use(src.to_reg());
|
||||||
collector.reg_fixed_nonallocatable(*dst);
|
collector.reg_fixed_nonallocatable(*dst);
|
||||||
}
|
}
|
||||||
Inst::XmmToGpr { src, dst, .. } => {
|
Inst::XmmToGpr { src, dst, .. } | Inst::XmmToGprImm { src, dst, .. } => {
|
||||||
collector.reg_use(src.to_reg());
|
collector.reg_use(src.to_reg());
|
||||||
collector.reg_def(dst.to_writable_reg());
|
collector.reg_def(dst.to_writable_reg());
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -684,8 +684,8 @@
|
|||||||
;; (TODO: when EVEX support is available, add an alternate lowering here).
|
;; (TODO: when EVEX support is available, add an alternate lowering here).
|
||||||
(rule (lower (has_type $I64X2 (sshr src amt)))
|
(rule (lower (has_type $I64X2 (sshr src amt)))
|
||||||
(let ((src_ Xmm (put_in_xmm src))
|
(let ((src_ Xmm (put_in_xmm src))
|
||||||
(lo Gpr (x64_pextrd $I64 src_ 0))
|
(lo Gpr (x64_pextrq src_ 0))
|
||||||
(hi Gpr (x64_pextrd $I64 src_ 1))
|
(hi Gpr (x64_pextrq src_ 1))
|
||||||
(amt_ Imm8Gpr (put_masked_in_imm8_gpr amt $I64))
|
(amt_ Imm8Gpr (put_masked_in_imm8_gpr amt $I64))
|
||||||
(shifted_lo Gpr (x64_sar $I64 lo amt_))
|
(shifted_lo Gpr (x64_sar $I64 lo amt_))
|
||||||
(shifted_hi Gpr (x64_sar $I64 hi amt_)))
|
(shifted_hi Gpr (x64_sar $I64 hi amt_)))
|
||||||
@@ -921,12 +921,8 @@
|
|||||||
x))
|
x))
|
||||||
(swiden_high (and (value_type (multi_lane 32 4))
|
(swiden_high (and (value_type (multi_lane 32 4))
|
||||||
y)))))
|
y)))))
|
||||||
(let ((x2 Xmm (x64_pshufd x
|
(let ((x2 Xmm (x64_pshufd x 0xFA))
|
||||||
0xFA
|
(y2 Xmm (x64_pshufd y 0xFA)))
|
||||||
(OperandSize.Size32)))
|
|
||||||
(y2 Xmm (x64_pshufd y
|
|
||||||
0xFA
|
|
||||||
(OperandSize.Size32))))
|
|
||||||
(x64_pmuldq x2 y2)))
|
(x64_pmuldq x2 y2)))
|
||||||
|
|
||||||
;; Special case for `i16x8.extmul_low_i8x16_s`.
|
;; Special case for `i16x8.extmul_low_i8x16_s`.
|
||||||
@@ -957,12 +953,8 @@
|
|||||||
x))
|
x))
|
||||||
(swiden_low (and (value_type (multi_lane 32 4))
|
(swiden_low (and (value_type (multi_lane 32 4))
|
||||||
y)))))
|
y)))))
|
||||||
(let ((x2 Xmm (x64_pshufd x
|
(let ((x2 Xmm (x64_pshufd x 0x50))
|
||||||
0x50
|
(y2 Xmm (x64_pshufd y 0x50)))
|
||||||
(OperandSize.Size32)))
|
|
||||||
(y2 Xmm (x64_pshufd y
|
|
||||||
0x50
|
|
||||||
(OperandSize.Size32))))
|
|
||||||
(x64_pmuldq x2 y2)))
|
(x64_pmuldq x2 y2)))
|
||||||
|
|
||||||
;; Special case for `i16x8.extmul_high_i8x16_u`.
|
;; Special case for `i16x8.extmul_high_i8x16_u`.
|
||||||
@@ -997,12 +989,8 @@
|
|||||||
x))
|
x))
|
||||||
(uwiden_high (and (value_type (multi_lane 32 4))
|
(uwiden_high (and (value_type (multi_lane 32 4))
|
||||||
y)))))
|
y)))))
|
||||||
(let ((x2 Xmm (x64_pshufd x
|
(let ((x2 Xmm (x64_pshufd x 0xFA))
|
||||||
0xFA
|
(y2 Xmm (x64_pshufd y 0xFA)))
|
||||||
(OperandSize.Size32)))
|
|
||||||
(y2 Xmm (x64_pshufd y
|
|
||||||
0xFA
|
|
||||||
(OperandSize.Size32))))
|
|
||||||
(x64_pmuludq x2 y2)))
|
(x64_pmuludq x2 y2)))
|
||||||
|
|
||||||
;; Special case for `i16x8.extmul_low_i8x16_u`.
|
;; Special case for `i16x8.extmul_low_i8x16_u`.
|
||||||
@@ -1033,12 +1021,8 @@
|
|||||||
x))
|
x))
|
||||||
(uwiden_low (and (value_type (multi_lane 32 4))
|
(uwiden_low (and (value_type (multi_lane 32 4))
|
||||||
y)))))
|
y)))))
|
||||||
(let ((x2 Xmm (x64_pshufd x
|
(let ((x2 Xmm (x64_pshufd x 0x50))
|
||||||
0x50
|
(y2 Xmm (x64_pshufd y 0x50)))
|
||||||
(OperandSize.Size32)))
|
|
||||||
(y2 Xmm (x64_pshufd y
|
|
||||||
0x50
|
|
||||||
(OperandSize.Size32))))
|
|
||||||
(x64_pmuludq x2 y2)))
|
(x64_pmuludq x2 y2)))
|
||||||
|
|
||||||
;;;; Rules for `iabs` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;;;; Rules for `iabs` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
@@ -3161,7 +3145,7 @@
|
|||||||
(x64_pmovsxwd (x64_palignr x x 8 (OperandSize.Size32)))))
|
(x64_pmovsxwd (x64_palignr x x 8 (OperandSize.Size32)))))
|
||||||
|
|
||||||
(rule (lower (has_type $I64X2 (swiden_high val @ (value_type $I32X4))))
|
(rule (lower (has_type $I64X2 (swiden_high val @ (value_type $I32X4))))
|
||||||
(x64_pmovsxdq (x64_pshufd val 0xEE (OperandSize.Size32))))
|
(x64_pmovsxdq (x64_pshufd val 0xEE)))
|
||||||
|
|
||||||
;; Rules for `uwiden_low` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;; Rules for `uwiden_low` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
|
||||||
@@ -3185,7 +3169,7 @@
|
|||||||
(x64_pmovzxwd (x64_palignr x x 8 (OperandSize.Size32)))))
|
(x64_pmovzxwd (x64_palignr x x 8 (OperandSize.Size32)))))
|
||||||
|
|
||||||
(rule (lower (has_type $I64X2 (uwiden_high val @ (value_type $I32X4))))
|
(rule (lower (has_type $I64X2 (uwiden_high val @ (value_type $I32X4))))
|
||||||
(x64_pmovzxdq (x64_pshufd val 0xEE (OperandSize.Size32))))
|
(x64_pmovzxdq (x64_pshufd val 0xEE)))
|
||||||
|
|
||||||
;; Rules for `snarrow` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;; Rules for `snarrow` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
|
||||||
@@ -3481,25 +3465,25 @@
|
|||||||
;; Cases 2-4 for an F32X4
|
;; Cases 2-4 for an F32X4
|
||||||
(rule 1 (lower (has_type $F32 (extractlane val @ (value_type (ty_vec128 ty))
|
(rule 1 (lower (has_type $F32 (extractlane val @ (value_type (ty_vec128 ty))
|
||||||
(u8_from_uimm8 lane))))
|
(u8_from_uimm8 lane))))
|
||||||
(x64_pshufd val lane (OperandSize.Size32)))
|
(x64_pshufd val lane))
|
||||||
|
|
||||||
;; This is the only remaining case for F64X2
|
;; This is the only remaining case for F64X2
|
||||||
(rule 1 (lower (has_type $F64 (extractlane val @ (value_type (ty_vec128 ty))
|
(rule 1 (lower (has_type $F64 (extractlane val @ (value_type (ty_vec128 ty))
|
||||||
(u8_from_uimm8 1))))
|
(u8_from_uimm8 1))))
|
||||||
;; 0xee == 0b11_10_11_10
|
;; 0xee == 0b11_10_11_10
|
||||||
(x64_pshufd val 0xee (OperandSize.Size32)))
|
(x64_pshufd val 0xee))
|
||||||
|
|
||||||
(rule 0 (lower (extractlane val @ (value_type ty @ (multi_lane 8 16)) (u8_from_uimm8 lane)))
|
(rule 0 (lower (extractlane val @ (value_type ty @ (multi_lane 8 16)) (u8_from_uimm8 lane)))
|
||||||
(x64_pextrb ty val lane))
|
(x64_pextrb val lane))
|
||||||
|
|
||||||
(rule 0 (lower (extractlane val @ (value_type ty @ (multi_lane 16 8)) (u8_from_uimm8 lane)))
|
(rule 0 (lower (extractlane val @ (value_type ty @ (multi_lane 16 8)) (u8_from_uimm8 lane)))
|
||||||
(x64_pextrw ty val lane))
|
(x64_pextrw val lane))
|
||||||
|
|
||||||
(rule 0 (lower (extractlane val @ (value_type ty @ (multi_lane 32 4)) (u8_from_uimm8 lane)))
|
(rule 0 (lower (extractlane val @ (value_type ty @ (multi_lane 32 4)) (u8_from_uimm8 lane)))
|
||||||
(x64_pextrd ty val lane))
|
(x64_pextrd val lane))
|
||||||
|
|
||||||
(rule 0 (lower (extractlane val @ (value_type ty @ (multi_lane 64 2)) (u8_from_uimm8 lane)))
|
(rule 0 (lower (extractlane val @ (value_type ty @ (multi_lane 64 2)) (u8_from_uimm8 lane)))
|
||||||
(x64_pextrd ty val lane))
|
(x64_pextrq val lane))
|
||||||
|
|
||||||
;; Rules for `scalar_to_vector` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;; Rules for `scalar_to_vector` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
|
||||||
@@ -3537,7 +3521,7 @@
|
|||||||
(vec Xmm (vec_insert_lane $I16X8 (xmm_uninit_value) src 0))
|
(vec Xmm (vec_insert_lane $I16X8 (xmm_uninit_value) src 0))
|
||||||
(vec Xmm (vec_insert_lane $I16X8 vec src 1)))
|
(vec Xmm (vec_insert_lane $I16X8 vec src 1)))
|
||||||
;; Shuffle the lowest two lanes to all other lanes.
|
;; Shuffle the lowest two lanes to all other lanes.
|
||||||
(x64_pshufd vec 0 (OperandSize.Size32))))
|
(x64_pshufd vec 0)))
|
||||||
|
|
||||||
(rule 1 (lower (has_type (multi_lane 32 4) (splat src @ (value_type (ty_scalar_float _)))))
|
(rule 1 (lower (has_type (multi_lane 32 4) (splat src @ (value_type (ty_scalar_float _)))))
|
||||||
(lower_splat_32x4 $F32X4 src))
|
(lower_splat_32x4 $F32X4 src))
|
||||||
@@ -3550,7 +3534,7 @@
|
|||||||
(let ((src RegMem src)
|
(let ((src RegMem src)
|
||||||
(vec Xmm (vec_insert_lane ty (xmm_uninit_value) src 0)))
|
(vec Xmm (vec_insert_lane ty (xmm_uninit_value) src 0)))
|
||||||
;; Shuffle the lowest lane to all other lanes.
|
;; Shuffle the lowest lane to all other lanes.
|
||||||
(x64_pshufd vec 0 (OperandSize.Size32))))
|
(x64_pshufd vec 0)))
|
||||||
|
|
||||||
(rule 1 (lower (has_type (multi_lane 64 2) (splat src @ (value_type (ty_scalar_float _)))))
|
(rule 1 (lower (has_type (multi_lane 64 2) (splat src @ (value_type (ty_scalar_float _)))))
|
||||||
(lower_splat_64x2 $F64X2 src))
|
(lower_splat_64x2 $F64X2 src))
|
||||||
|
|||||||
@@ -86,7 +86,7 @@ block0(v0: i64x2):
|
|||||||
; pushq %rbp
|
; pushq %rbp
|
||||||
; movq %rsp, %rbp
|
; movq %rsp, %rbp
|
||||||
; block0:
|
; block0:
|
||||||
; pextrd.w $1, %xmm0, %rax
|
; pextrq $1, %xmm0, %rax
|
||||||
; movq %rbp, %rsp
|
; movq %rbp, %rsp
|
||||||
; popq %rbp
|
; popq %rbp
|
||||||
; ret
|
; ret
|
||||||
|
|||||||
@@ -753,8 +753,8 @@ block0(v0: i64x2):
|
|||||||
; pushq %rbp
|
; pushq %rbp
|
||||||
; movq %rsp, %rbp
|
; movq %rsp, %rbp
|
||||||
; block0:
|
; block0:
|
||||||
; pextrd.w $0, %xmm0, %rdx
|
; pextrq $0, %xmm0, %rdx
|
||||||
; pextrd.w $1, %xmm0, %r9
|
; pextrq $1, %xmm0, %r9
|
||||||
; sarq $36, %rdx, %rdx
|
; sarq $36, %rdx, %rdx
|
||||||
; sarq $36, %r9, %r9
|
; sarq $36, %r9, %r9
|
||||||
; uninit %xmm0
|
; uninit %xmm0
|
||||||
@@ -789,8 +789,8 @@ block0(v0: i64x2, v1: i32):
|
|||||||
; pushq %rbp
|
; pushq %rbp
|
||||||
; movq %rsp, %rbp
|
; movq %rsp, %rbp
|
||||||
; block0:
|
; block0:
|
||||||
; pextrd.w $0, %xmm0, %r8
|
; pextrq $0, %xmm0, %r8
|
||||||
; pextrd.w $1, %xmm0, %r10
|
; pextrq $1, %xmm0, %r10
|
||||||
; movq %rdi, %rcx
|
; movq %rdi, %rcx
|
||||||
; sarq %cl, %r8, %r8
|
; sarq %cl, %r8, %r8
|
||||||
; sarq %cl, %r10, %r10
|
; sarq %cl, %r10, %r10
|
||||||
|
|||||||
Reference in New Issue
Block a user