x64: Remove conditional SseOpcode::uses_src1 (#5842)

This is a follow-up to comments in #5795 to remove some cruft in the x64
instruction model to ensure that the shape of an `Inst` reflects what's
going to happen in regalloc and encoding. This accessor was used to
handle `round*`, `pextr*`, and `pshufb` instructions. The `round*` ones
had already moved to the appropriate `XmmUnary*` variant and `pshufb`
was additionally moved over to that variant as well.

The `pextr*` instructions got a new `Inst` variant and additionally had
their constructors slightly modified to no longer require the type as
input. The encoding for these instructions now automatically handles the
various type-related operands through a new `SseOpcode::Pextrq` operand
to represent 64-bit movements.
This commit is contained in:
Alex Crichton
2023-02-21 12:17:07 -06:00
committed by GitHub
parent e6a5ec3fde
commit c65de1f1b1
7 changed files with 98 additions and 141 deletions

View File

@@ -331,6 +331,12 @@
(dst WritableGpr) (dst WritableGpr)
(dst_size OperandSize)) (dst_size OperandSize))
;; XMM (scalar) unary op (from xmm to integer reg): pextr{w,b,d,q}
(XmmToGprImm (op SseOpcode)
(src Xmm)
(dst WritableGpr)
(imm u8))
;; XMM (scalar) unary op (from integer to float reg): movd, movq, ;; XMM (scalar) unary op (from integer to float reg): movd, movq,
;; cvtsi2s{s,d} ;; cvtsi2s{s,d}
(GprToXmm (op SseOpcode) (GprToXmm (op SseOpcode)
@@ -749,6 +755,7 @@
Pextrb Pextrb
Pextrw Pextrw
Pextrd Pextrd
Pextrq
Pinsrb Pinsrb
Pinsrw Pinsrw
Pinsrd Pinsrd
@@ -3110,16 +3117,9 @@
(xmm_rmr_imm_vex (AvxOpcode.Vinsertps) src1 src2 lane)) (xmm_rmr_imm_vex (AvxOpcode.Vinsertps) src1 src2 lane))
;; Helper for creating `pshufd` instructions. ;; Helper for creating `pshufd` instructions.
(decl x64_pshufd (XmmMem u8 OperandSize) Xmm) (decl x64_pshufd (XmmMem u8) Xmm)
(rule (x64_pshufd src imm size) (rule (x64_pshufd src imm)
(let ((dst WritableXmm (temp_writable_xmm)) (xmm_unary_rm_r_imm (SseOpcode.Pshufd) src imm))
(_ Unit (emit (MInst.XmmRmRImm (SseOpcode.Pshufd)
dst
src
dst
imm
size))))
dst))
;; Helper for creating `pshufb` instructions. ;; Helper for creating `pshufb` instructions.
(decl x64_pshufb (Xmm XmmMem) Xmm) (decl x64_pshufb (Xmm XmmMem) Xmm)
@@ -3314,40 +3314,24 @@
(xmm_rmir_vex (AvxOpcode.Vpsrad) src1 src2)) (xmm_rmir_vex (AvxOpcode.Vpsrad) src1 src2))
;; Helper for creating `pextrb` instructions. ;; Helper for creating `pextrb` instructions.
(decl x64_pextrb (Type Xmm u8) Gpr) (decl x64_pextrb (Xmm u8) Gpr)
(rule (x64_pextrb ty src lane) (rule (x64_pextrb src lane)
(let ((dst WritableGpr (temp_writable_gpr)) (xmm_to_gpr_imm (SseOpcode.Pextrb) src lane))
(_ Unit (emit (MInst.XmmRmRImm (SseOpcode.Pextrb)
dst
src
dst
lane
(operand_size_of_type_32_64 (lane_type ty))))))
dst))
;; Helper for creating `pextrw` instructions. ;; Helper for creating `pextrw` instructions.
(decl x64_pextrw (Type Xmm u8) Gpr) (decl x64_pextrw (Xmm u8) Gpr)
(rule (x64_pextrw ty src lane) (rule (x64_pextrw src lane)
(let ((dst WritableGpr (temp_writable_gpr)) (xmm_to_gpr_imm (SseOpcode.Pextrw) src lane))
(_ Unit (emit (MInst.XmmRmRImm (SseOpcode.Pextrw)
dst
src
dst
lane
(operand_size_of_type_32_64 (lane_type ty))))))
dst))
;; Helper for creating `pextrd` instructions. ;; Helper for creating `pextrd` instructions.
(decl x64_pextrd (Type Xmm u8) Gpr) (decl x64_pextrd (Xmm u8) Gpr)
(rule (x64_pextrd ty src lane) (rule (x64_pextrd src lane)
(let ((dst WritableGpr (temp_writable_gpr)) (xmm_to_gpr_imm (SseOpcode.Pextrd) src lane))
(_ Unit (emit (MInst.XmmRmRImm (SseOpcode.Pextrd)
dst ;; Helper for creating `pextrq` instructions.
src (decl x64_pextrq (Xmm u8) Gpr)
dst (rule (x64_pextrq src lane)
lane (xmm_to_gpr_imm (SseOpcode.Pextrq) src lane))
(operand_size_of_type_32_64 (lane_type ty))))))
dst))
;; Helper for creating `MInst.XmmToGpr` instructions. ;; Helper for creating `MInst.XmmToGpr` instructions.
(decl xmm_to_gpr (SseOpcode Xmm OperandSize) Gpr) (decl xmm_to_gpr (SseOpcode Xmm OperandSize) Gpr)
@@ -3356,6 +3340,13 @@
(_ Unit (emit (MInst.XmmToGpr op src dst size)))) (_ Unit (emit (MInst.XmmToGpr op src dst size))))
dst)) dst))
;; Helper for creating `MInst.XmmToGpr` instructions.
(decl xmm_to_gpr_imm (SseOpcode Xmm u8) Gpr)
(rule (xmm_to_gpr_imm op src imm)
(let ((dst WritableGpr (temp_writable_gpr))
(_ Unit (emit (MInst.XmmToGprImm op src dst imm))))
dst))
;; Helper for creating `pmovmskb` instructions. ;; Helper for creating `pmovmskb` instructions.
(decl x64_pmovmskb (OperandSize Xmm) Gpr) (decl x64_pmovmskb (OperandSize Xmm) Gpr)
(rule (x64_pmovmskb size src) (rule (x64_pmovmskb size src)

View File

@@ -999,6 +999,7 @@ pub enum SseOpcode {
Pextrb, Pextrb,
Pextrw, Pextrw,
Pextrd, Pextrd,
Pextrq,
Pinsrb, Pinsrb,
Pinsrw, Pinsrw,
Pinsrd, Pinsrd,
@@ -1237,6 +1238,7 @@ impl SseOpcode {
| SseOpcode::Pcmpeqq | SseOpcode::Pcmpeqq
| SseOpcode::Pextrb | SseOpcode::Pextrb
| SseOpcode::Pextrd | SseOpcode::Pextrd
| SseOpcode::Pextrq
| SseOpcode::Pinsrb | SseOpcode::Pinsrb
| SseOpcode::Pinsrd | SseOpcode::Pinsrd
| SseOpcode::Pmaxsb | SseOpcode::Pmaxsb
@@ -1278,22 +1280,6 @@ impl SseOpcode {
_ => 8, _ => 8,
} }
} }
/// Does an XmmRmmRImm with this opcode use src1? FIXME: split
/// into separate instructions.
pub(crate) fn uses_src1(&self) -> bool {
match self {
SseOpcode::Pextrb => false,
SseOpcode::Pextrw => false,
SseOpcode::Pextrd => false,
SseOpcode::Pshufd => false,
SseOpcode::Roundss => false,
SseOpcode::Roundsd => false,
SseOpcode::Roundps => false,
SseOpcode::Roundpd => false,
_ => true,
}
}
} }
impl fmt::Debug for SseOpcode { impl fmt::Debug for SseOpcode {
@@ -1393,6 +1379,7 @@ impl fmt::Debug for SseOpcode {
SseOpcode::Pextrb => "pextrb", SseOpcode::Pextrb => "pextrb",
SseOpcode::Pextrw => "pextrw", SseOpcode::Pextrw => "pextrw",
SseOpcode::Pextrd => "pextrd", SseOpcode::Pextrd => "pextrd",
SseOpcode::Pextrq => "pextrq",
SseOpcode::Pinsrb => "pinsrb", SseOpcode::Pinsrb => "pinsrb",
SseOpcode::Pinsrw => "pinsrw", SseOpcode::Pinsrw => "pinsrw",
SseOpcode::Pinsrd => "pinsrd", SseOpcode::Pinsrd => "pinsrd",

View File

@@ -1792,8 +1792,6 @@ pub(crate) fn emit(
} }
Inst::XmmUnaryRmRImm { op, src, dst, imm } => { Inst::XmmUnaryRmRImm { op, src, dst, imm } => {
debug_assert!(!op.uses_src1());
let dst = allocs.next(dst.to_reg().to_reg()); let dst = allocs.next(dst.to_reg().to_reg());
let src = src.clone().to_reg_mem().with_allocs(allocs); let src = src.clone().to_reg_mem().with_allocs(allocs);
let rex = RexFlags::clear_w(); let rex = RexFlags::clear_w();
@@ -1803,6 +1801,7 @@ pub(crate) fn emit(
SseOpcode::Roundss => (LegacyPrefixes::_66, 0x0F3A0A, 3), SseOpcode::Roundss => (LegacyPrefixes::_66, 0x0F3A0A, 3),
SseOpcode::Roundpd => (LegacyPrefixes::_66, 0x0F3A09, 3), SseOpcode::Roundpd => (LegacyPrefixes::_66, 0x0F3A09, 3),
SseOpcode::Roundsd => (LegacyPrefixes::_66, 0x0F3A0B, 3), SseOpcode::Roundsd => (LegacyPrefixes::_66, 0x0F3A0B, 3),
SseOpcode::Pshufd => (LegacyPrefixes::_66, 0x0F70, 2),
_ => unimplemented!("Opcode {:?} not implemented", op), _ => unimplemented!("Opcode {:?} not implemented", op),
}; };
match src { match src {
@@ -2458,17 +2457,10 @@ pub(crate) fn emit(
imm, imm,
size, size,
} => { } => {
let (src2, dst) = if !op.uses_src1() { let src1 = allocs.next(*src1);
let dst = allocs.next(dst.to_reg()); let dst = allocs.next(dst.to_reg());
let src2 = src2.with_allocs(allocs); let src2 = src2.with_allocs(allocs);
(src2, dst) debug_assert_eq!(src1, dst);
} else {
let src1 = allocs.next(*src1);
let dst = allocs.next(dst.to_reg());
let src2 = src2.with_allocs(allocs);
debug_assert_eq!(src1, dst);
(src2, dst)
};
let (prefix, opcode, len) = match op { let (prefix, opcode, len) = match op {
SseOpcode::Cmpps => (LegacyPrefixes::None, 0x0FC2, 2), SseOpcode::Cmpps => (LegacyPrefixes::None, 0x0FC2, 2),
@@ -2480,10 +2472,6 @@ pub(crate) fn emit(
SseOpcode::Pinsrb => (LegacyPrefixes::_66, 0x0F3A20, 3), SseOpcode::Pinsrb => (LegacyPrefixes::_66, 0x0F3A20, 3),
SseOpcode::Pinsrw => (LegacyPrefixes::_66, 0x0FC4, 2), SseOpcode::Pinsrw => (LegacyPrefixes::_66, 0x0FC4, 2),
SseOpcode::Pinsrd => (LegacyPrefixes::_66, 0x0F3A22, 3), SseOpcode::Pinsrd => (LegacyPrefixes::_66, 0x0F3A22, 3),
SseOpcode::Pextrb => (LegacyPrefixes::_66, 0x0F3A14, 3),
SseOpcode::Pextrw => (LegacyPrefixes::_66, 0x0FC5, 2),
SseOpcode::Pextrd => (LegacyPrefixes::_66, 0x0F3A16, 3),
SseOpcode::Pshufd => (LegacyPrefixes::_66, 0x0F70, 2),
SseOpcode::Shufps => (LegacyPrefixes::None, 0x0FC6, 2), SseOpcode::Shufps => (LegacyPrefixes::None, 0x0FC6, 2),
_ => unimplemented!("Opcode {:?} not implemented", op), _ => unimplemented!("Opcode {:?} not implemented", op),
}; };
@@ -2566,6 +2554,26 @@ pub(crate) fn emit(
emit_std_reg_reg(sink, prefix, opcode, 2, src, dst, rex); emit_std_reg_reg(sink, prefix, opcode, 2, src, dst, rex);
} }
Inst::XmmToGprImm { op, src, dst, imm } => {
use OperandSize as OS;
let src = allocs.next(src.to_reg());
let dst = allocs.next(dst.to_reg().to_reg());
let (prefix, opcode, opcode_bytes, dst_size, dst_first) = match op {
SseOpcode::Pextrb => (LegacyPrefixes::_66, 0x0F3A14, 3, OS::Size32, false),
SseOpcode::Pextrw => (LegacyPrefixes::_66, 0x0FC5, 2, OS::Size32, true),
SseOpcode::Pextrd => (LegacyPrefixes::_66, 0x0F3A16, 3, OS::Size32, false),
SseOpcode::Pextrq => (LegacyPrefixes::_66, 0x0F3A16, 3, OS::Size64, false),
_ => panic!("unexpected opcode {:?}", op),
};
let rex = RexFlags::from(dst_size);
let (src, dst) = if dst_first { (dst, src) } else { (src, dst) };
emit_std_reg_reg(sink, prefix, opcode, opcode_bytes, src, dst, rex);
sink.put1(*imm);
}
Inst::GprToXmm { Inst::GprToXmm {
op, op,
src: src_e, src: src_e,

View File

@@ -136,6 +136,7 @@ impl Inst {
| Inst::XmmRmRBlend { op, .. } | Inst::XmmRmRBlend { op, .. }
| Inst::XmmRmRImm { op, .. } | Inst::XmmRmRImm { op, .. }
| Inst::XmmToGpr { op, .. } | Inst::XmmToGpr { op, .. }
| Inst::XmmToGprImm { op, .. }
| Inst::XmmUnaryRmRImm { op, .. } | Inst::XmmUnaryRmRImm { op, .. }
| Inst::XmmUnaryRmR { op, .. } | Inst::XmmUnaryRmR { op, .. }
| Inst::XmmConstOp { op, .. } => smallvec![op.available_from()], | Inst::XmmConstOp { op, .. } => smallvec![op.available_from()],
@@ -1111,15 +1112,11 @@ impl PrettyPrint for Inst {
size, size,
.. ..
} => { } => {
let src1 = if op.uses_src1() { let src1 = pretty_print_reg(*src1, 8, allocs);
pretty_print_reg(*src1, 8, allocs) + ", "
} else {
"".into()
};
let dst = pretty_print_reg(dst.to_reg(), 8, allocs); let dst = pretty_print_reg(dst.to_reg(), 8, allocs);
let src2 = src2.pretty_print(8, allocs); let src2 = src2.pretty_print(8, allocs);
format!( format!(
"{} ${}, {}{}, {}", "{} ${imm}, {src1}, {src2}, {dst}",
ljustify(format!( ljustify(format!(
"{}{}", "{}{}",
op.to_string(), op.to_string(),
@@ -1129,10 +1126,6 @@ impl PrettyPrint for Inst {
"" ""
} }
)), )),
imm,
src1,
src2,
dst,
) )
} }
@@ -1153,6 +1146,12 @@ impl PrettyPrint for Inst {
format!("{} {}, {}", ljustify(op.to_string()), src, dst) format!("{} {}, {}", ljustify(op.to_string()), src, dst)
} }
Inst::XmmToGprImm { op, src, dst, imm } => {
let src = pretty_print_reg(src.to_reg(), 8, allocs);
let dst = pretty_print_reg(dst.to_reg().to_reg(), 8, allocs);
format!("{} ${imm}, {}, {}", ljustify(op.to_string()), src, dst)
}
Inst::GprToXmm { Inst::GprToXmm {
op, op,
src, src,
@@ -1976,23 +1975,11 @@ fn x64_get_operands<F: Fn(VReg) -> VReg>(inst: &Inst, collector: &mut OperandCol
src1.get_operands(collector); src1.get_operands(collector);
} }
Inst::XmmRmRImm { Inst::XmmRmRImm {
op, src1, src2, dst, ..
src1,
src2,
dst,
..
} => { } => {
if !op.uses_src1() { collector.reg_use(*src1);
// FIXME: split this instruction into two, so we don't collector.reg_reuse_def(*dst, 0);
// need this awkward src1-is-only-sometimes-an-arg src2.get_operands(collector);
// behavior.
collector.reg_def(*dst);
src2.get_operands(collector);
} else {
collector.reg_use(*src1);
collector.reg_reuse_def(*dst, 0);
src2.get_operands(collector);
}
} }
Inst::XmmConstOp { dst, .. } => { Inst::XmmConstOp { dst, .. } => {
collector.reg_def(dst.to_writable_reg()); collector.reg_def(dst.to_writable_reg());
@@ -2035,7 +2022,7 @@ fn x64_get_operands<F: Fn(VReg) -> VReg>(inst: &Inst, collector: &mut OperandCol
collector.reg_use(src.to_reg()); collector.reg_use(src.to_reg());
collector.reg_fixed_nonallocatable(*dst); collector.reg_fixed_nonallocatable(*dst);
} }
Inst::XmmToGpr { src, dst, .. } => { Inst::XmmToGpr { src, dst, .. } | Inst::XmmToGprImm { src, dst, .. } => {
collector.reg_use(src.to_reg()); collector.reg_use(src.to_reg());
collector.reg_def(dst.to_writable_reg()); collector.reg_def(dst.to_writable_reg());
} }

View File

@@ -684,8 +684,8 @@
;; (TODO: when EVEX support is available, add an alternate lowering here). ;; (TODO: when EVEX support is available, add an alternate lowering here).
(rule (lower (has_type $I64X2 (sshr src amt))) (rule (lower (has_type $I64X2 (sshr src amt)))
(let ((src_ Xmm (put_in_xmm src)) (let ((src_ Xmm (put_in_xmm src))
(lo Gpr (x64_pextrd $I64 src_ 0)) (lo Gpr (x64_pextrq src_ 0))
(hi Gpr (x64_pextrd $I64 src_ 1)) (hi Gpr (x64_pextrq src_ 1))
(amt_ Imm8Gpr (put_masked_in_imm8_gpr amt $I64)) (amt_ Imm8Gpr (put_masked_in_imm8_gpr amt $I64))
(shifted_lo Gpr (x64_sar $I64 lo amt_)) (shifted_lo Gpr (x64_sar $I64 lo amt_))
(shifted_hi Gpr (x64_sar $I64 hi amt_))) (shifted_hi Gpr (x64_sar $I64 hi amt_)))
@@ -921,12 +921,8 @@
x)) x))
(swiden_high (and (value_type (multi_lane 32 4)) (swiden_high (and (value_type (multi_lane 32 4))
y))))) y)))))
(let ((x2 Xmm (x64_pshufd x (let ((x2 Xmm (x64_pshufd x 0xFA))
0xFA (y2 Xmm (x64_pshufd y 0xFA)))
(OperandSize.Size32)))
(y2 Xmm (x64_pshufd y
0xFA
(OperandSize.Size32))))
(x64_pmuldq x2 y2))) (x64_pmuldq x2 y2)))
;; Special case for `i16x8.extmul_low_i8x16_s`. ;; Special case for `i16x8.extmul_low_i8x16_s`.
@@ -957,12 +953,8 @@
x)) x))
(swiden_low (and (value_type (multi_lane 32 4)) (swiden_low (and (value_type (multi_lane 32 4))
y))))) y)))))
(let ((x2 Xmm (x64_pshufd x (let ((x2 Xmm (x64_pshufd x 0x50))
0x50 (y2 Xmm (x64_pshufd y 0x50)))
(OperandSize.Size32)))
(y2 Xmm (x64_pshufd y
0x50
(OperandSize.Size32))))
(x64_pmuldq x2 y2))) (x64_pmuldq x2 y2)))
;; Special case for `i16x8.extmul_high_i8x16_u`. ;; Special case for `i16x8.extmul_high_i8x16_u`.
@@ -997,12 +989,8 @@
x)) x))
(uwiden_high (and (value_type (multi_lane 32 4)) (uwiden_high (and (value_type (multi_lane 32 4))
y))))) y)))))
(let ((x2 Xmm (x64_pshufd x (let ((x2 Xmm (x64_pshufd x 0xFA))
0xFA (y2 Xmm (x64_pshufd y 0xFA)))
(OperandSize.Size32)))
(y2 Xmm (x64_pshufd y
0xFA
(OperandSize.Size32))))
(x64_pmuludq x2 y2))) (x64_pmuludq x2 y2)))
;; Special case for `i16x8.extmul_low_i8x16_u`. ;; Special case for `i16x8.extmul_low_i8x16_u`.
@@ -1033,12 +1021,8 @@
x)) x))
(uwiden_low (and (value_type (multi_lane 32 4)) (uwiden_low (and (value_type (multi_lane 32 4))
y))))) y)))))
(let ((x2 Xmm (x64_pshufd x (let ((x2 Xmm (x64_pshufd x 0x50))
0x50 (y2 Xmm (x64_pshufd y 0x50)))
(OperandSize.Size32)))
(y2 Xmm (x64_pshufd y
0x50
(OperandSize.Size32))))
(x64_pmuludq x2 y2))) (x64_pmuludq x2 y2)))
;;;; Rules for `iabs` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;; Rules for `iabs` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@@ -3161,7 +3145,7 @@
(x64_pmovsxwd (x64_palignr x x 8 (OperandSize.Size32))))) (x64_pmovsxwd (x64_palignr x x 8 (OperandSize.Size32)))))
(rule (lower (has_type $I64X2 (swiden_high val @ (value_type $I32X4)))) (rule (lower (has_type $I64X2 (swiden_high val @ (value_type $I32X4))))
(x64_pmovsxdq (x64_pshufd val 0xEE (OperandSize.Size32)))) (x64_pmovsxdq (x64_pshufd val 0xEE)))
;; Rules for `uwiden_low` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; Rules for `uwiden_low` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@@ -3185,7 +3169,7 @@
(x64_pmovzxwd (x64_palignr x x 8 (OperandSize.Size32))))) (x64_pmovzxwd (x64_palignr x x 8 (OperandSize.Size32)))))
(rule (lower (has_type $I64X2 (uwiden_high val @ (value_type $I32X4)))) (rule (lower (has_type $I64X2 (uwiden_high val @ (value_type $I32X4))))
(x64_pmovzxdq (x64_pshufd val 0xEE (OperandSize.Size32)))) (x64_pmovzxdq (x64_pshufd val 0xEE)))
;; Rules for `snarrow` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; Rules for `snarrow` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@@ -3481,25 +3465,25 @@
;; Cases 2-4 for an F32X4 ;; Cases 2-4 for an F32X4
(rule 1 (lower (has_type $F32 (extractlane val @ (value_type (ty_vec128 ty)) (rule 1 (lower (has_type $F32 (extractlane val @ (value_type (ty_vec128 ty))
(u8_from_uimm8 lane)))) (u8_from_uimm8 lane))))
(x64_pshufd val lane (OperandSize.Size32))) (x64_pshufd val lane))
;; This is the only remaining case for F64X2 ;; This is the only remaining case for F64X2
(rule 1 (lower (has_type $F64 (extractlane val @ (value_type (ty_vec128 ty)) (rule 1 (lower (has_type $F64 (extractlane val @ (value_type (ty_vec128 ty))
(u8_from_uimm8 1)))) (u8_from_uimm8 1))))
;; 0xee == 0b11_10_11_10 ;; 0xee == 0b11_10_11_10
(x64_pshufd val 0xee (OperandSize.Size32))) (x64_pshufd val 0xee))
(rule 0 (lower (extractlane val @ (value_type ty @ (multi_lane 8 16)) (u8_from_uimm8 lane))) (rule 0 (lower (extractlane val @ (value_type ty @ (multi_lane 8 16)) (u8_from_uimm8 lane)))
(x64_pextrb ty val lane)) (x64_pextrb val lane))
(rule 0 (lower (extractlane val @ (value_type ty @ (multi_lane 16 8)) (u8_from_uimm8 lane))) (rule 0 (lower (extractlane val @ (value_type ty @ (multi_lane 16 8)) (u8_from_uimm8 lane)))
(x64_pextrw ty val lane)) (x64_pextrw val lane))
(rule 0 (lower (extractlane val @ (value_type ty @ (multi_lane 32 4)) (u8_from_uimm8 lane))) (rule 0 (lower (extractlane val @ (value_type ty @ (multi_lane 32 4)) (u8_from_uimm8 lane)))
(x64_pextrd ty val lane)) (x64_pextrd val lane))
(rule 0 (lower (extractlane val @ (value_type ty @ (multi_lane 64 2)) (u8_from_uimm8 lane))) (rule 0 (lower (extractlane val @ (value_type ty @ (multi_lane 64 2)) (u8_from_uimm8 lane)))
(x64_pextrd ty val lane)) (x64_pextrq val lane))
;; Rules for `scalar_to_vector` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; Rules for `scalar_to_vector` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@@ -3537,7 +3521,7 @@
(vec Xmm (vec_insert_lane $I16X8 (xmm_uninit_value) src 0)) (vec Xmm (vec_insert_lane $I16X8 (xmm_uninit_value) src 0))
(vec Xmm (vec_insert_lane $I16X8 vec src 1))) (vec Xmm (vec_insert_lane $I16X8 vec src 1)))
;; Shuffle the lowest two lanes to all other lanes. ;; Shuffle the lowest two lanes to all other lanes.
(x64_pshufd vec 0 (OperandSize.Size32)))) (x64_pshufd vec 0)))
(rule 1 (lower (has_type (multi_lane 32 4) (splat src @ (value_type (ty_scalar_float _))))) (rule 1 (lower (has_type (multi_lane 32 4) (splat src @ (value_type (ty_scalar_float _)))))
(lower_splat_32x4 $F32X4 src)) (lower_splat_32x4 $F32X4 src))
@@ -3550,7 +3534,7 @@
(let ((src RegMem src) (let ((src RegMem src)
(vec Xmm (vec_insert_lane ty (xmm_uninit_value) src 0))) (vec Xmm (vec_insert_lane ty (xmm_uninit_value) src 0)))
;; Shuffle the lowest lane to all other lanes. ;; Shuffle the lowest lane to all other lanes.
(x64_pshufd vec 0 (OperandSize.Size32)))) (x64_pshufd vec 0)))
(rule 1 (lower (has_type (multi_lane 64 2) (splat src @ (value_type (ty_scalar_float _))))) (rule 1 (lower (has_type (multi_lane 64 2) (splat src @ (value_type (ty_scalar_float _)))))
(lower_splat_64x2 $F64X2 src)) (lower_splat_64x2 $F64X2 src))

View File

@@ -86,7 +86,7 @@ block0(v0: i64x2):
; pushq %rbp ; pushq %rbp
; movq %rsp, %rbp ; movq %rsp, %rbp
; block0: ; block0:
; pextrd.w $1, %xmm0, %rax ; pextrq $1, %xmm0, %rax
; movq %rbp, %rsp ; movq %rbp, %rsp
; popq %rbp ; popq %rbp
; ret ; ret

View File

@@ -753,8 +753,8 @@ block0(v0: i64x2):
; pushq %rbp ; pushq %rbp
; movq %rsp, %rbp ; movq %rsp, %rbp
; block0: ; block0:
; pextrd.w $0, %xmm0, %rdx ; pextrq $0, %xmm0, %rdx
; pextrd.w $1, %xmm0, %r9 ; pextrq $1, %xmm0, %r9
; sarq $36, %rdx, %rdx ; sarq $36, %rdx, %rdx
; sarq $36, %r9, %r9 ; sarq $36, %r9, %r9
; uninit %xmm0 ; uninit %xmm0
@@ -789,8 +789,8 @@ block0(v0: i64x2, v1: i32):
; pushq %rbp ; pushq %rbp
; movq %rsp, %rbp ; movq %rsp, %rbp
; block0: ; block0:
; pextrd.w $0, %xmm0, %r8 ; pextrq $0, %xmm0, %r8
; pextrd.w $1, %xmm0, %r10 ; pextrq $1, %xmm0, %r10
; movq %rdi, %rcx ; movq %rdi, %rcx
; sarq %cl, %r8, %r8 ; sarq %cl, %r8, %r8
; sarq %cl, %r10, %r10 ; sarq %cl, %r10, %r10