x64: Refactor and fill out some gpr-vs-xmm bits (#6058)
* x64: Add instruction helpers for `mov{d,q}`
These will soon grow AVX-equivalents so move them to instruction helpers
to have clauses for AVX in the future.
* x64: Don't auto-convert between RegMemImm and XmmMemImm
The previous conversion, `mov_rmi_to_xmm`, would move from GPR registers
to XMM registers which isn't what many of the other `convert` statements
between these newtypes do. This seemed like a possible footgun so I've
removed the auto-conversion and added an explicit helper to go from a
`u32` to an `XmmMemImm`.
* x64: Add AVX encodings of some more GPR-related insns
This commit adds some more support for AVX instructions where GPRs are
in use mixed in with XMM registers. This required a few more variants of
`Inst` to handle the new instructions.
* Fix vpmovmskb encoding
* Fix xmm-to-gpr encoding of vmovd/vmovq
* Fix typo
* Fix rebase conflict
* Fix rebase conflict with tests
This commit is contained in:
@@ -320,6 +320,21 @@
|
||||
(dst WritableGpr)
|
||||
(imm u8))
|
||||
|
||||
;; XMM (scalar) unary op (from integer to float reg): vmovd, vmovq,
|
||||
;; vcvtsi2s{s,d}
|
||||
(GprToXmmVex (op AvxOpcode)
|
||||
(src GprMem)
|
||||
(dst WritableXmm)
|
||||
(src_size OperandSize))
|
||||
|
||||
;; XMM (scalar) unary op (from xmm to integer reg): vmovd, vmovq,
|
||||
;; vcvtts{s,d}2si
|
||||
(XmmToGprVex (op AvxOpcode)
|
||||
(src Xmm)
|
||||
(dst WritableGpr)
|
||||
(dst_size OperandSize))
|
||||
|
||||
|
||||
;; XMM (scalar or vector) binary op that relies on the EVEX
|
||||
;; prefix. Takes two inputs.
|
||||
(XmmRmREvex (op Avx512Opcode)
|
||||
@@ -1277,6 +1292,13 @@
|
||||
Vpbroadcastw
|
||||
Vpbroadcastd
|
||||
Vbroadcastss
|
||||
Vmovd
|
||||
Vmovq
|
||||
Vmovmskps
|
||||
Vmovmskpd
|
||||
Vpmovmskb
|
||||
Vcvtsi2ss
|
||||
Vcvtsi2sd
|
||||
))
|
||||
|
||||
(type Avx512Opcode extern
|
||||
@@ -1539,6 +1561,10 @@
|
||||
(decl lo_gpr (Value) Gpr)
|
||||
(rule (lo_gpr regs) (gpr_new (lo_reg regs)))
|
||||
|
||||
;; Construct a new `XmmMemImm` from a 32-bit immediate.
|
||||
(decl xmi_imm (u32) XmmMemImm)
|
||||
(extern constructor xmi_imm xmi_imm)
|
||||
|
||||
;;;; Helpers for Working With Integer Comparison Codes ;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;;
|
||||
|
||||
@@ -1818,10 +1844,7 @@
|
||||
(decl mov_rmi_to_xmm (RegMemImm) XmmMemImm)
|
||||
(rule (mov_rmi_to_xmm rmi @ (RegMemImm.Mem _)) (xmm_mem_imm_new rmi))
|
||||
(rule (mov_rmi_to_xmm rmi @ (RegMemImm.Imm _)) (xmm_mem_imm_new rmi))
|
||||
(rule (mov_rmi_to_xmm (RegMemImm.Reg r))
|
||||
(gpr_to_xmm (SseOpcode.Movd)
|
||||
r
|
||||
(OperandSize.Size32)))
|
||||
(rule (mov_rmi_to_xmm (RegMemImm.Reg r)) (x64_movd_to_xmm r))
|
||||
|
||||
;;;; Helpers for Emitting Calls ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
@@ -1941,9 +1964,37 @@
|
||||
(if-let $true (use_avx_simd))
|
||||
(xmm_movrm_vex (AvxOpcode.Vmovupd) addr data))
|
||||
|
||||
(decl x64_movd (Xmm) Gpr)
|
||||
(rule (x64_movd from)
|
||||
;; Helper for creating `movd` instructions.
|
||||
(decl x64_movd_to_gpr (Xmm) Gpr)
|
||||
(rule (x64_movd_to_gpr from)
|
||||
(xmm_to_gpr (SseOpcode.Movd) from (OperandSize.Size32)))
|
||||
(rule 1 (x64_movd_to_gpr from)
|
||||
(if-let $true (use_avx_simd))
|
||||
(xmm_to_gpr_vex (AvxOpcode.Vmovd) from (OperandSize.Size32)))
|
||||
|
||||
;; Helper for creating `movd` instructions.
|
||||
(decl x64_movd_to_xmm (GprMem) Xmm)
|
||||
(rule (x64_movd_to_xmm from)
|
||||
(gpr_to_xmm (SseOpcode.Movd) from (OperandSize.Size32)))
|
||||
(rule 1 (x64_movd_to_xmm from)
|
||||
(if-let $true (use_avx_simd))
|
||||
(gpr_to_xmm_vex (AvxOpcode.Vmovd) from (OperandSize.Size32)))
|
||||
|
||||
;; Helper for creating `movq` instructions.
|
||||
(decl x64_movq_to_xmm (GprMem) Xmm)
|
||||
(rule (x64_movq_to_xmm src)
|
||||
(gpr_to_xmm (SseOpcode.Movq) src (OperandSize.Size64)))
|
||||
(rule 1 (x64_movq_to_xmm from)
|
||||
(if-let $true (use_avx_simd))
|
||||
(gpr_to_xmm_vex (AvxOpcode.Vmovq) from (OperandSize.Size64)))
|
||||
|
||||
;; Helper for creating `movq` instructions.
|
||||
(decl x64_movq_to_gpr (Xmm) Gpr)
|
||||
(rule (x64_movq_to_gpr src)
|
||||
(xmm_to_gpr (SseOpcode.Movq) src (OperandSize.Size64)))
|
||||
(rule 1 (x64_movq_to_gpr from)
|
||||
(if-let $true (use_avx_simd))
|
||||
(xmm_to_gpr_vex (AvxOpcode.Vmovq) from (OperandSize.Size64)))
|
||||
|
||||
(decl x64_movdqu_load (XmmMem) Xmm)
|
||||
(rule (x64_movdqu_load from)
|
||||
@@ -2186,15 +2237,11 @@
|
||||
|
||||
;; `f32` immediates.
|
||||
(rule 2 (imm $F32 (u64_nonzero bits))
|
||||
(gpr_to_xmm (SseOpcode.Movd)
|
||||
(imm $I32 bits)
|
||||
(OperandSize.Size32)))
|
||||
(x64_movd_to_xmm (imm $I32 bits)))
|
||||
|
||||
;; `f64` immediates.
|
||||
(rule 2 (imm $F64 (u64_nonzero bits))
|
||||
(gpr_to_xmm (SseOpcode.Movq)
|
||||
(imm $I64 bits)
|
||||
(OperandSize.Size64)))
|
||||
(x64_movq_to_xmm (imm $I64 bits)))
|
||||
|
||||
;; Special case for when a 64-bit immediate fits into 32-bits. We can use a
|
||||
;; 32-bit move that zero-extends the value, which has a smaller encoding.
|
||||
@@ -3663,20 +3710,44 @@
|
||||
(_ Unit (emit (MInst.XmmToGprImmVex op src dst imm))))
|
||||
dst))
|
||||
|
||||
;; Helper for creating `MInst.XmmToGprVex` instructions.
|
||||
(decl xmm_to_gpr_vex (AvxOpcode Xmm OperandSize) Gpr)
|
||||
(rule (xmm_to_gpr_vex op src size)
|
||||
(let ((dst WritableGpr (temp_writable_gpr))
|
||||
(_ Unit (emit (MInst.XmmToGprVex op src dst size))))
|
||||
dst))
|
||||
|
||||
;; Helper for creating `MInst.GprToXmmVex` instructions.
|
||||
(decl gpr_to_xmm_vex (AvxOpcode GprMem OperandSize) Xmm)
|
||||
(rule (gpr_to_xmm_vex op src size)
|
||||
(let ((dst WritableXmm (temp_writable_xmm))
|
||||
(_ Unit (emit (MInst.GprToXmmVex op src dst size))))
|
||||
dst))
|
||||
|
||||
|
||||
;; Helper for creating `pmovmskb` instructions.
|
||||
(decl x64_pmovmskb (OperandSize Xmm) Gpr)
|
||||
(rule (x64_pmovmskb size src)
|
||||
(xmm_to_gpr (SseOpcode.Pmovmskb) src size))
|
||||
(rule 1 (x64_pmovmskb size src)
|
||||
(if-let $true (use_avx_simd))
|
||||
(xmm_to_gpr_vex (AvxOpcode.Vpmovmskb) src size))
|
||||
|
||||
;; Helper for creating `movmskps` instructions.
|
||||
(decl x64_movmskps (OperandSize Xmm) Gpr)
|
||||
(rule (x64_movmskps size src)
|
||||
(xmm_to_gpr (SseOpcode.Movmskps) src size))
|
||||
(rule 1 (x64_movmskps size src)
|
||||
(if-let $true (use_avx_simd))
|
||||
(xmm_to_gpr_vex (AvxOpcode.Vmovmskps) src size))
|
||||
|
||||
;; Helper for creating `movmskpd` instructions.
|
||||
(decl x64_movmskpd (OperandSize Xmm) Gpr)
|
||||
(rule (x64_movmskpd size src)
|
||||
(xmm_to_gpr (SseOpcode.Movmskpd) src size))
|
||||
(rule 1 (x64_movmskpd size src)
|
||||
(if-let $true (use_avx_simd))
|
||||
(xmm_to_gpr_vex (AvxOpcode.Vmovmskpd) src size))
|
||||
|
||||
;; Helper for creating `MInst.GprToXmm` instructions.
|
||||
(decl gpr_to_xmm (SseOpcode GprMem OperandSize) Xmm)
|
||||
@@ -3973,11 +4044,17 @@
|
||||
(decl x64_cvtsi2ss (Type GprMem) Xmm)
|
||||
(rule (x64_cvtsi2ss ty x)
|
||||
(gpr_to_xmm (SseOpcode.Cvtsi2ss) x (raw_operand_size_of_type ty)))
|
||||
(rule 1 (x64_cvtsi2ss ty x)
|
||||
(if-let $true (use_avx_simd))
|
||||
(gpr_to_xmm_vex (AvxOpcode.Vcvtsi2ss) x (raw_operand_size_of_type ty)))
|
||||
|
||||
;; Helper for creating `cvtsi2sd` instructions.
|
||||
(decl x64_cvtsi2sd (Type GprMem) Xmm)
|
||||
(rule (x64_cvtsi2sd ty x)
|
||||
(gpr_to_xmm (SseOpcode.Cvtsi2sd) x (raw_operand_size_of_type ty)))
|
||||
(rule 1 (x64_cvtsi2sd ty x)
|
||||
(if-let $true (use_avx_simd))
|
||||
(gpr_to_xmm_vex (AvxOpcode.Vcvtsi2sd) x (raw_operand_size_of_type ty)))
|
||||
|
||||
;; Helper for creating `cvttps2dq` instructions.
|
||||
(decl x64_cvttps2dq (XmmMem) Xmm)
|
||||
@@ -4486,15 +4563,15 @@
|
||||
|
||||
(decl bitcast_xmm_to_gpr (Type Xmm) Gpr)
|
||||
(rule (bitcast_xmm_to_gpr $F32 src)
|
||||
(xmm_to_gpr (SseOpcode.Movd) src (OperandSize.Size32)))
|
||||
(x64_movd_to_gpr src))
|
||||
(rule (bitcast_xmm_to_gpr $F64 src)
|
||||
(xmm_to_gpr (SseOpcode.Movq) src (OperandSize.Size64)))
|
||||
(x64_movq_to_gpr src))
|
||||
|
||||
(decl bitcast_gpr_to_xmm (Type Gpr) Xmm)
|
||||
(rule (bitcast_gpr_to_xmm $I32 src)
|
||||
(gpr_to_xmm (SseOpcode.Movd) src (OperandSize.Size32)))
|
||||
(x64_movd_to_xmm src))
|
||||
(rule (bitcast_gpr_to_xmm $I64 src)
|
||||
(gpr_to_xmm (SseOpcode.Movq) src (OperandSize.Size64)))
|
||||
(x64_movq_to_xmm src))
|
||||
|
||||
;;;; Stack Addresses ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
@@ -4678,7 +4755,6 @@
|
||||
(convert Reg XmmMem reg_to_xmm_mem)
|
||||
(convert Reg RegMemImm reg_to_reg_mem_imm)
|
||||
(convert RegMem XmmMem reg_mem_to_xmm_mem)
|
||||
(convert RegMemImm XmmMemImm mov_rmi_to_xmm)
|
||||
(convert Xmm XmmMem xmm_to_xmm_mem)
|
||||
(convert Xmm XmmMemImm xmm_to_xmm_mem_imm)
|
||||
(convert Xmm XmmMemAligned xmm_to_xmm_mem_aligned)
|
||||
|
||||
@@ -1715,7 +1715,14 @@ impl AvxOpcode {
|
||||
| AvxOpcode::Vpextrq
|
||||
| AvxOpcode::Vpblendw
|
||||
| AvxOpcode::Vmovddup
|
||||
| AvxOpcode::Vbroadcastss => {
|
||||
| AvxOpcode::Vbroadcastss
|
||||
| AvxOpcode::Vmovd
|
||||
| AvxOpcode::Vmovq
|
||||
| AvxOpcode::Vmovmskps
|
||||
| AvxOpcode::Vmovmskpd
|
||||
| AvxOpcode::Vpmovmskb
|
||||
| AvxOpcode::Vcvtsi2ss
|
||||
| AvxOpcode::Vcvtsi2sd => {
|
||||
smallvec![InstructionSet::AVX]
|
||||
}
|
||||
|
||||
|
||||
@@ -2515,6 +2515,89 @@ pub(crate) fn emit(
|
||||
.encode(sink);
|
||||
}
|
||||
|
||||
Inst::XmmToGprVex {
|
||||
op,
|
||||
src,
|
||||
dst,
|
||||
dst_size,
|
||||
} => {
|
||||
let src = allocs.next(src.to_reg());
|
||||
let dst = allocs.next(dst.to_reg().to_reg());
|
||||
|
||||
let (prefix, map, opcode) = match op {
|
||||
// vmovd/vmovq are differentiated by `w`
|
||||
AvxOpcode::Vmovd | AvxOpcode::Vmovq => (LegacyPrefixes::_66, OpcodeMap::_0F, 0x7E),
|
||||
AvxOpcode::Vmovmskps => (LegacyPrefixes::None, OpcodeMap::_0F, 0x50),
|
||||
AvxOpcode::Vmovmskpd => (LegacyPrefixes::_66, OpcodeMap::_0F, 0x50),
|
||||
AvxOpcode::Vpmovmskb => (LegacyPrefixes::_66, OpcodeMap::_0F, 0xD7),
|
||||
_ => unimplemented!("Opcode {:?} not implemented", op),
|
||||
};
|
||||
let w = match dst_size {
|
||||
OperandSize::Size64 => true,
|
||||
_ => false,
|
||||
};
|
||||
let mut vex = VexInstruction::new()
|
||||
.length(VexVectorLength::V128)
|
||||
.w(w)
|
||||
.prefix(prefix)
|
||||
.map(map)
|
||||
.opcode(opcode);
|
||||
vex = match op {
|
||||
// The `vmovq/vmovd` reverse the order of the destination/source
|
||||
// relative to other opcodes using this shape of instruction.
|
||||
AvxOpcode::Vmovd | AvxOpcode::Vmovq => vex
|
||||
.rm(dst.to_real_reg().unwrap().hw_enc())
|
||||
.reg(src.to_real_reg().unwrap().hw_enc()),
|
||||
_ => vex
|
||||
.rm(src.to_real_reg().unwrap().hw_enc())
|
||||
.reg(dst.to_real_reg().unwrap().hw_enc()),
|
||||
};
|
||||
vex.encode(sink);
|
||||
}
|
||||
|
||||
Inst::GprToXmmVex {
|
||||
op,
|
||||
src,
|
||||
dst,
|
||||
src_size,
|
||||
} => {
|
||||
let dst = allocs.next(dst.to_reg().to_reg());
|
||||
let src = match src.clone().to_reg_mem().with_allocs(allocs) {
|
||||
RegMem::Reg { reg } => {
|
||||
RegisterOrAmode::Register(reg.to_real_reg().unwrap().hw_enc().into())
|
||||
}
|
||||
RegMem::Mem { addr } => RegisterOrAmode::Amode(addr.finalize(state, sink)),
|
||||
};
|
||||
|
||||
let (prefix, map, opcode) = match op {
|
||||
// vmovd/vmovq are differentiated by `w`
|
||||
AvxOpcode::Vmovd | AvxOpcode::Vmovq => (LegacyPrefixes::_66, OpcodeMap::_0F, 0x6E),
|
||||
AvxOpcode::Vcvtsi2ss => (LegacyPrefixes::_F3, OpcodeMap::_0F, 0x2A),
|
||||
AvxOpcode::Vcvtsi2sd => (LegacyPrefixes::_F2, OpcodeMap::_0F, 0x2A),
|
||||
_ => unimplemented!("Opcode {:?} not implemented", op),
|
||||
};
|
||||
let w = match src_size {
|
||||
OperandSize::Size64 => true,
|
||||
_ => false,
|
||||
};
|
||||
let mut insn = VexInstruction::new()
|
||||
.length(VexVectorLength::V128)
|
||||
.w(w)
|
||||
.prefix(prefix)
|
||||
.map(map)
|
||||
.opcode(opcode)
|
||||
.rm(src)
|
||||
.reg(dst.to_real_reg().unwrap().hw_enc());
|
||||
// These opcodes technically take a second operand which is the
|
||||
// upper bits to preserve during the float conversion. We don't
|
||||
// actually use this in this backend right now so reuse the
|
||||
// destination register. This at least matches what LLVM does.
|
||||
if let AvxOpcode::Vcvtsi2ss | AvxOpcode::Vcvtsi2sd = op {
|
||||
insn = insn.vvvv(dst.to_real_reg().unwrap().hw_enc());
|
||||
}
|
||||
insn.encode(sink);
|
||||
}
|
||||
|
||||
Inst::XmmRmREvex {
|
||||
op,
|
||||
src1,
|
||||
|
||||
@@ -158,7 +158,9 @@ impl Inst {
|
||||
| Inst::XmmUnaryRmRImmVex { op, .. }
|
||||
| Inst::XmmMovRMVex { op, .. }
|
||||
| Inst::XmmMovRMImmVex { op, .. }
|
||||
| Inst::XmmToGprImmVex { op, .. } => op.available_from(),
|
||||
| Inst::XmmToGprImmVex { op, .. }
|
||||
| Inst::XmmToGprVex { op, .. }
|
||||
| Inst::GprToXmmVex { op, .. } => op.available_from(),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1202,6 +1204,18 @@ impl PrettyPrint for Inst {
|
||||
format!("{} {}, {}", ljustify(op.to_string()), src, dst)
|
||||
}
|
||||
|
||||
Inst::XmmToGprVex {
|
||||
op,
|
||||
src,
|
||||
dst,
|
||||
dst_size,
|
||||
} => {
|
||||
let dst_size = dst_size.to_bytes();
|
||||
let src = pretty_print_reg(src.to_reg(), 8, allocs);
|
||||
let dst = pretty_print_reg(dst.to_reg().to_reg(), dst_size, allocs);
|
||||
format!("{} {src}, {dst}", ljustify(op.to_string()))
|
||||
}
|
||||
|
||||
Inst::XmmToGprImm { op, src, dst, imm } => {
|
||||
let src = pretty_print_reg(src.to_reg(), 8, allocs);
|
||||
let dst = pretty_print_reg(dst.to_reg().to_reg(), 8, allocs);
|
||||
@@ -1225,6 +1239,17 @@ impl PrettyPrint for Inst {
|
||||
format!("{} {}, {}", ljustify(op.to_string()), src, dst)
|
||||
}
|
||||
|
||||
Inst::GprToXmmVex {
|
||||
op,
|
||||
src,
|
||||
src_size,
|
||||
dst,
|
||||
} => {
|
||||
let dst = pretty_print_reg(dst.to_reg().to_reg(), 8, allocs);
|
||||
let src = src.pretty_print(src_size.to_bytes(), allocs);
|
||||
format!("{} {src}, {dst}", ljustify(op.to_string()))
|
||||
}
|
||||
|
||||
Inst::XmmCmpRmR { op, src, dst } => {
|
||||
let dst = pretty_print_reg(dst.to_reg(), 8, allocs);
|
||||
let src = src.pretty_print(8, allocs);
|
||||
@@ -2082,12 +2107,13 @@ fn x64_get_operands<F: Fn(VReg) -> VReg>(inst: &Inst, collector: &mut OperandCol
|
||||
collector.reg_fixed_nonallocatable(*dst);
|
||||
}
|
||||
Inst::XmmToGpr { src, dst, .. }
|
||||
| Inst::XmmToGprVex { src, dst, .. }
|
||||
| Inst::XmmToGprImm { src, dst, .. }
|
||||
| Inst::XmmToGprImmVex { src, dst, .. } => {
|
||||
collector.reg_use(src.to_reg());
|
||||
collector.reg_def(dst.to_writable_reg());
|
||||
}
|
||||
Inst::GprToXmm { src, dst, .. } => {
|
||||
Inst::GprToXmm { src, dst, .. } | Inst::GprToXmmVex { src, dst, .. } => {
|
||||
collector.reg_def(dst.to_writable_reg());
|
||||
src.get_operands(collector);
|
||||
}
|
||||
|
||||
@@ -883,17 +883,17 @@
|
||||
(let ((a0 Xmm a)
|
||||
(b0 Xmm b)
|
||||
;; a_hi = A >> 32
|
||||
(a_hi Xmm (x64_psrlq a0 (RegMemImm.Imm 32)))
|
||||
(a_hi Xmm (x64_psrlq a0 (xmi_imm 32)))
|
||||
;; ah_bl = Ah * Bl
|
||||
(ah_bl Xmm (x64_pmuludq a_hi b0))
|
||||
;; b_hi = B >> 32
|
||||
(b_hi Xmm (x64_psrlq b0 (RegMemImm.Imm 32)))
|
||||
(b_hi Xmm (x64_psrlq b0 (xmi_imm 32)))
|
||||
;; al_bh = Al * Bh
|
||||
(al_bh Xmm (x64_pmuludq a0 b_hi))
|
||||
;; aa_bb = ah_bl + al_bh
|
||||
(aa_bb Xmm (x64_paddq ah_bl al_bh))
|
||||
;; aa_bb_shifted = aa_bb << 32
|
||||
(aa_bb_shifted Xmm (x64_psllq aa_bb (RegMemImm.Imm 32)))
|
||||
(aa_bb_shifted Xmm (x64_psllq aa_bb (xmi_imm 32)))
|
||||
;; al_bl = Al * Bl
|
||||
(al_bl Xmm (x64_pmuludq a0 b0)))
|
||||
;; al_bl + aa_bb_shifted
|
||||
@@ -1087,14 +1087,12 @@
|
||||
;; Special case for `f32x4.abs`.
|
||||
(rule (lower (has_type $F32X4 (fabs x)))
|
||||
(x64_andps x
|
||||
(x64_psrld (vector_all_ones)
|
||||
(RegMemImm.Imm 1))))
|
||||
(x64_psrld (vector_all_ones) (xmi_imm 1))))
|
||||
|
||||
;; Special case for `f64x2.abs`.
|
||||
(rule (lower (has_type $F64X2 (fabs x)))
|
||||
(x64_andpd x
|
||||
(x64_psrlq (vector_all_ones)
|
||||
(RegMemImm.Imm 1))))
|
||||
(x64_psrlq (vector_all_ones) (xmi_imm 1))))
|
||||
|
||||
;;;; Rules for `fneg` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
@@ -1106,13 +1104,11 @@
|
||||
|
||||
(rule (lower (has_type $F32X4 (fneg x)))
|
||||
(x64_xorps x
|
||||
(x64_pslld (vector_all_ones)
|
||||
(RegMemImm.Imm 31))))
|
||||
(x64_pslld (vector_all_ones) (xmi_imm 31))))
|
||||
|
||||
(rule (lower (has_type $F64X2 (fneg x)))
|
||||
(x64_xorpd x
|
||||
(x64_psllq (vector_all_ones)
|
||||
(RegMemImm.Imm 63))))
|
||||
(x64_psllq (vector_all_ones) (xmi_imm 63))))
|
||||
|
||||
;;;; Rules for `bmask` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
@@ -1918,7 +1914,7 @@
|
||||
;; Note that this is a 16x8 shift, but that's OK; we mask
|
||||
;; off anything that traverses from one byte to the next
|
||||
;; with the low_mask below.
|
||||
(shifted_src Xmm (x64_psrlw src (RegMemImm.Imm 4)))
|
||||
(shifted_src Xmm (x64_psrlw src (xmi_imm 4)))
|
||||
(high_nibbles Xmm (sse_and $I8X16 shifted_src low_mask))
|
||||
(lookup Xmm (x64_xmm_load_const $I8X16 (popcount_4bit_table)))
|
||||
(bit_counts_low Xmm (x64_pshufb lookup low_nibbles))
|
||||
@@ -2237,7 +2233,7 @@
|
||||
;; All-ones for NaN, shifted down to leave 10 top bits (1
|
||||
;; sign, 8 exponent, 1 QNaN bit that must remain set)
|
||||
;; cleared.
|
||||
(nan_fraction_mask Xmm (x64_psrld is_nan_mask (RegMemImm.Imm 10)))
|
||||
(nan_fraction_mask Xmm (x64_psrld is_nan_mask (xmi_imm 10)))
|
||||
;; Do a NAND, so that we retain every bit not set in
|
||||
;; `nan_fraction_mask`. This mask will be all zeroes (so
|
||||
;; we retain every bit) in non-NaN cases, and will have
|
||||
@@ -2254,7 +2250,7 @@
|
||||
(min_or Xmm (x64_orpd min1 min2))
|
||||
(is_nan_mask Xmm (x64_cmppd min1 min2 (FcmpImm.Unordered)))
|
||||
(min_or_2 Xmm (x64_orpd min_or is_nan_mask))
|
||||
(nan_fraction_mask Xmm (x64_psrlq is_nan_mask (RegMemImm.Imm 13)))
|
||||
(nan_fraction_mask Xmm (x64_psrlq is_nan_mask (xmi_imm 13)))
|
||||
(final Xmm (x64_andnpd nan_fraction_mask min_or_2)))
|
||||
final))
|
||||
|
||||
@@ -2302,7 +2298,7 @@
|
||||
;; All-ones for NaN, shifted down to leave 10 top bits (1
|
||||
;; sign, 8 exponent, 1 QNaN bit that must remain set)
|
||||
;; cleared.
|
||||
(nan_fraction_mask Xmm (x64_psrld is_nan_mask (RegMemImm.Imm 10)))
|
||||
(nan_fraction_mask Xmm (x64_psrld is_nan_mask (xmi_imm 10)))
|
||||
;; Do a NAND, so that we retain every bit not set in
|
||||
;; `nan_fraction_mask`. This mask will be all zeroes (so
|
||||
;; we retain every bit) in non-NaN cases, and will have
|
||||
@@ -2346,7 +2342,7 @@
|
||||
;; All-ones for NaN, shifted down to leave 13 top bits (1
|
||||
;; sign, 11 exponent, 1 QNaN bit that must remain set)
|
||||
;; cleared.
|
||||
(nan_fraction_mask Xmm (x64_psrlq is_nan_mask (RegMemImm.Imm 13)))
|
||||
(nan_fraction_mask Xmm (x64_psrlq is_nan_mask (xmi_imm 13)))
|
||||
;; Do a NAND, so that we retain every bit not set in
|
||||
;; `nan_fraction_mask`. This mask will be all zeroes (so
|
||||
;; we retain every bit) in non-NaN cases, and will have
|
||||
@@ -3011,8 +3007,8 @@
|
||||
(let ((a Xmm val)
|
||||
|
||||
;; get the low 16 bits
|
||||
(a_lo Xmm (x64_pslld a (RegMemImm.Imm 16)))
|
||||
(a_lo Xmm (x64_psrld a_lo (RegMemImm.Imm 16)))
|
||||
(a_lo Xmm (x64_pslld a (xmi_imm 16)))
|
||||
(a_lo Xmm (x64_psrld a_lo (xmi_imm 16)))
|
||||
|
||||
;; get the high 16 bits
|
||||
(a_hi Xmm (x64_psubd a a_lo))
|
||||
@@ -3022,7 +3018,7 @@
|
||||
|
||||
;; shift the high bits by 1, convert, and double to get the correct
|
||||
;; value
|
||||
(a_hi Xmm (x64_psrld a_hi (RegMemImm.Imm 1)))
|
||||
(a_hi Xmm (x64_psrld a_hi (xmi_imm 1)))
|
||||
(a_hi Xmm (x64_cvtdq2ps a_hi))
|
||||
(a_hi Xmm (x64_addps a_hi a_hi)))
|
||||
|
||||
@@ -3060,7 +3056,7 @@
|
||||
|
||||
;; Set top bit only if < 0
|
||||
(tmp Xmm (x64_pand dst tmp))
|
||||
(tmp Xmm (x64_psrad tmp (RegMemImm.Imm 31))))
|
||||
(tmp Xmm (x64_psrad tmp (xmi_imm 31))))
|
||||
|
||||
;; On overflow 0x80000000 is returned to a lane.
|
||||
;; Below sets positive overflow lanes to 0x7FFFFFFF
|
||||
@@ -3130,7 +3126,7 @@
|
||||
;; integer that it can represent. In the case of INT_MAX, this value gets
|
||||
;; represented as 0x4f000000 which is the integer value (INT_MAX+1).
|
||||
(tmp2 Xmm (x64_pcmpeqd tmp2 tmp2))
|
||||
(tmp2 Xmm (x64_psrld tmp2 (RegMemImm.Imm 1)))
|
||||
(tmp2 Xmm (x64_psrld tmp2 (xmi_imm 1)))
|
||||
(tmp2 Xmm (x64_cvtdq2ps tmp2))
|
||||
|
||||
;; Make a copy of these lanes and then do the first conversion.
|
||||
|
||||
@@ -1038,6 +1038,10 @@ impl Context for IsleContext<'_, '_, MInst, X64Backend> {
|
||||
| bit(h, 7)?,
|
||||
)
|
||||
}
|
||||
|
||||
fn xmi_imm(&mut self, imm: u32) -> XmmMemImm {
|
||||
XmmMemImm::new(RegMemImm::imm(imm)).unwrap()
|
||||
}
|
||||
}
|
||||
|
||||
impl IsleContext<'_, '_, MInst, X64Backend> {
|
||||
|
||||
104
cranelift/filetests/filetests/isa/x64/fcvt-avx.clif
Normal file
104
cranelift/filetests/filetests/isa/x64/fcvt-avx.clif
Normal file
@@ -0,0 +1,104 @@
|
||||
test compile precise-output
|
||||
set enable_simd
|
||||
target x86_64 has_avx
|
||||
|
||||
function %f3(i32) -> f32 {
|
||||
block0(v0: i32):
|
||||
v1 = fcvt_from_sint.f32 v0
|
||||
return v1
|
||||
}
|
||||
|
||||
; VCode:
|
||||
; pushq %rbp
|
||||
; movq %rsp, %rbp
|
||||
; block0:
|
||||
; vcvtsi2ss %edi, %xmm0
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
; ret
|
||||
;
|
||||
; Disassembled:
|
||||
; block0: ; offset 0x0
|
||||
; pushq %rbp
|
||||
; movq %rsp, %rbp
|
||||
; block1: ; offset 0x4
|
||||
; vcvtsi2ssl %edi, %xmm0, %xmm0
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
; retq
|
||||
|
||||
function %f4(i64) -> f32 {
|
||||
block0(v0: i64):
|
||||
v1 = fcvt_from_sint.f32 v0
|
||||
return v1
|
||||
}
|
||||
|
||||
; VCode:
|
||||
; pushq %rbp
|
||||
; movq %rsp, %rbp
|
||||
; block0:
|
||||
; vcvtsi2ss %rdi, %xmm0
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
; ret
|
||||
;
|
||||
; Disassembled:
|
||||
; block0: ; offset 0x0
|
||||
; pushq %rbp
|
||||
; movq %rsp, %rbp
|
||||
; block1: ; offset 0x4
|
||||
; vcvtsi2ssq %rdi, %xmm0, %xmm0
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
; retq
|
||||
|
||||
function %f7(i32) -> f64 {
|
||||
block0(v0: i32):
|
||||
v1 = fcvt_from_sint.f64 v0
|
||||
return v1
|
||||
}
|
||||
|
||||
; VCode:
|
||||
; pushq %rbp
|
||||
; movq %rsp, %rbp
|
||||
; block0:
|
||||
; vcvtsi2sd %edi, %xmm0
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
; ret
|
||||
;
|
||||
; Disassembled:
|
||||
; block0: ; offset 0x0
|
||||
; pushq %rbp
|
||||
; movq %rsp, %rbp
|
||||
; block1: ; offset 0x4
|
||||
; vcvtsi2sdl %edi, %xmm0, %xmm0
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
; retq
|
||||
|
||||
function %f8(i64) -> f64 {
|
||||
block0(v0: i64):
|
||||
v1 = fcvt_from_sint.f64 v0
|
||||
return v1
|
||||
}
|
||||
|
||||
; VCode:
|
||||
; pushq %rbp
|
||||
; movq %rsp, %rbp
|
||||
; block0:
|
||||
; vcvtsi2sd %rdi, %xmm0
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
; ret
|
||||
;
|
||||
; Disassembled:
|
||||
; block0: ; offset 0x0
|
||||
; pushq %rbp
|
||||
; movq %rsp, %rbp
|
||||
; block1: ; offset 0x4
|
||||
; vcvtsi2sdq %rdi, %xmm0, %xmm0
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
; retq
|
||||
|
||||
104
cranelift/filetests/filetests/isa/x64/float-bitcast-avx.clif
Normal file
104
cranelift/filetests/filetests/isa/x64/float-bitcast-avx.clif
Normal file
@@ -0,0 +1,104 @@
|
||||
test compile precise-output
|
||||
set enable_simd
|
||||
target x86_64 has_avx
|
||||
|
||||
function %i32_to_f32(i32) -> f32 {
|
||||
block0(v0: i32):
|
||||
v1 = bitcast.f32 v0
|
||||
return v1
|
||||
}
|
||||
|
||||
; VCode:
|
||||
; pushq %rbp
|
||||
; movq %rsp, %rbp
|
||||
; block0:
|
||||
; vmovd %edi, %xmm0
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
; ret
|
||||
;
|
||||
; Disassembled:
|
||||
; block0: ; offset 0x0
|
||||
; pushq %rbp
|
||||
; movq %rsp, %rbp
|
||||
; block1: ; offset 0x4
|
||||
; vmovd %edi, %xmm0
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
; retq
|
||||
|
||||
function %i64_to_f64(i64) -> f64 {
|
||||
block0(v0: i64):
|
||||
v1 = bitcast.f64 v0
|
||||
return v1
|
||||
}
|
||||
|
||||
; VCode:
|
||||
; pushq %rbp
|
||||
; movq %rsp, %rbp
|
||||
; block0:
|
||||
; vmovq %rdi, %xmm0
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
; ret
|
||||
;
|
||||
; Disassembled:
|
||||
; block0: ; offset 0x0
|
||||
; pushq %rbp
|
||||
; movq %rsp, %rbp
|
||||
; block1: ; offset 0x4
|
||||
; vmovq %rdi, %xmm0
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
; retq
|
||||
|
||||
function %f32_to_i32(f32) -> i32 {
|
||||
block0(v0: f32):
|
||||
v1 = bitcast.i32 v0
|
||||
return v1
|
||||
}
|
||||
|
||||
; VCode:
|
||||
; pushq %rbp
|
||||
; movq %rsp, %rbp
|
||||
; block0:
|
||||
; vmovd %xmm0, %eax
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
; ret
|
||||
;
|
||||
; Disassembled:
|
||||
; block0: ; offset 0x0
|
||||
; pushq %rbp
|
||||
; movq %rsp, %rbp
|
||||
; block1: ; offset 0x4
|
||||
; vmovd %xmm0, %eax
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
; retq
|
||||
|
||||
function %f64_to_i64(f64) -> i64 {
|
||||
block0(v0: f64):
|
||||
v1 = bitcast.i64 v0
|
||||
return v1
|
||||
}
|
||||
|
||||
; VCode:
|
||||
; pushq %rbp
|
||||
; movq %rsp, %rbp
|
||||
; block0:
|
||||
; vmovq %xmm0, %rax
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
; ret
|
||||
;
|
||||
; Disassembled:
|
||||
; block0: ; offset 0x0
|
||||
; pushq %rbp
|
||||
; movq %rsp, %rbp
|
||||
; block1: ; offset 0x4
|
||||
; vmovq %xmm0, %rax
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
; retq
|
||||
|
||||
104
cranelift/filetests/filetests/isa/x64/float-bitcast.clif
Normal file
104
cranelift/filetests/filetests/isa/x64/float-bitcast.clif
Normal file
@@ -0,0 +1,104 @@
|
||||
test compile precise-output
|
||||
set enable_simd
|
||||
target x86_64
|
||||
|
||||
function %i32_to_f32(i32) -> f32 {
|
||||
block0(v0: i32):
|
||||
v1 = bitcast.f32 v0
|
||||
return v1
|
||||
}
|
||||
|
||||
; VCode:
|
||||
; pushq %rbp
|
||||
; movq %rsp, %rbp
|
||||
; block0:
|
||||
; movd %edi, %xmm0
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
; ret
|
||||
;
|
||||
; Disassembled:
|
||||
; block0: ; offset 0x0
|
||||
; pushq %rbp
|
||||
; movq %rsp, %rbp
|
||||
; block1: ; offset 0x4
|
||||
; movd %edi, %xmm0
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
; retq
|
||||
|
||||
function %i64_to_f64(i64) -> f64 {
|
||||
block0(v0: i64):
|
||||
v1 = bitcast.f64 v0
|
||||
return v1
|
||||
}
|
||||
|
||||
; VCode:
|
||||
; pushq %rbp
|
||||
; movq %rsp, %rbp
|
||||
; block0:
|
||||
; movq %rdi, %xmm0
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
; ret
|
||||
;
|
||||
; Disassembled:
|
||||
; block0: ; offset 0x0
|
||||
; pushq %rbp
|
||||
; movq %rsp, %rbp
|
||||
; block1: ; offset 0x4
|
||||
; movq %rdi, %xmm0
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
; retq
|
||||
|
||||
function %f32_to_i32(f32) -> i32 {
|
||||
block0(v0: f32):
|
||||
v1 = bitcast.i32 v0
|
||||
return v1
|
||||
}
|
||||
|
||||
; VCode:
|
||||
; pushq %rbp
|
||||
; movq %rsp, %rbp
|
||||
; block0:
|
||||
; movd %xmm0, %eax
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
; ret
|
||||
;
|
||||
; Disassembled:
|
||||
; block0: ; offset 0x0
|
||||
; pushq %rbp
|
||||
; movq %rsp, %rbp
|
||||
; block1: ; offset 0x4
|
||||
; movd %xmm0, %eax
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
; retq
|
||||
|
||||
function %f64_to_i64(f64) -> i64 {
|
||||
block0(v0: f64):
|
||||
v1 = bitcast.i64 v0
|
||||
return v1
|
||||
}
|
||||
|
||||
; VCode:
|
||||
; pushq %rbp
|
||||
; movq %rsp, %rbp
|
||||
; block0:
|
||||
; movq %xmm0, %rax
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
; ret
|
||||
;
|
||||
; Disassembled:
|
||||
; block0: ; offset 0x0
|
||||
; pushq %rbp
|
||||
; movq %rsp, %rbp
|
||||
; block1: ; offset 0x4
|
||||
; movq %xmm0, %rax
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
; retq
|
||||
|
||||
@@ -920,7 +920,7 @@ block0(v0: i8x16, v1: i32):
|
||||
; vpunpcklbw %xmm0, %xmm0, %xmm5
|
||||
; vpunpckhbw %xmm0, %xmm0, %xmm7
|
||||
; addl %r9d, $8, %r9d
|
||||
; movd %r9d, %xmm11
|
||||
; vmovd %r9d, %xmm11
|
||||
; vpsraw %xmm5, %xmm11, %xmm13
|
||||
; vpsraw %xmm7, %xmm11, %xmm15
|
||||
; vpacksswb %xmm13, %xmm15, %xmm0
|
||||
@@ -938,7 +938,7 @@ block0(v0: i8x16, v1: i32):
|
||||
; vpunpcklbw %xmm0, %xmm0, %xmm5
|
||||
; vpunpckhbw %xmm0, %xmm0, %xmm7
|
||||
; addl $8, %r9d
|
||||
; movd %r9d, %xmm11
|
||||
; vmovd %r9d, %xmm11
|
||||
; vpsraw %xmm11, %xmm5, %xmm13
|
||||
; vpsraw %xmm11, %xmm7, %xmm15
|
||||
; vpacksswb %xmm15, %xmm13, %xmm0
|
||||
@@ -992,7 +992,7 @@ block0(v0: i16x8, v1: i32):
|
||||
; block0:
|
||||
; movq %rdi, %rcx
|
||||
; andq %rcx, $15, %rcx
|
||||
; movd %ecx, %xmm5
|
||||
; vmovd %ecx, %xmm5
|
||||
; vpsraw %xmm0, %xmm5, %xmm0
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
@@ -1005,7 +1005,7 @@ block0(v0: i16x8, v1: i32):
|
||||
; block1: ; offset 0x4
|
||||
; movq %rdi, %rcx
|
||||
; andq $0xf, %rcx
|
||||
; movd %ecx, %xmm5
|
||||
; vmovd %ecx, %xmm5
|
||||
; vpsraw %xmm5, %xmm0, %xmm0
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
@@ -1049,7 +1049,7 @@ block0(v0: i32x4, v1: i32):
|
||||
; block0:
|
||||
; movq %rdi, %rcx
|
||||
; andq %rcx, $31, %rcx
|
||||
; movd %ecx, %xmm5
|
||||
; vmovd %ecx, %xmm5
|
||||
; vpsrad %xmm0, %xmm5, %xmm0
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
@@ -1062,7 +1062,7 @@ block0(v0: i32x4, v1: i32):
|
||||
; block1: ; offset 0x4
|
||||
; movq %rdi, %rcx
|
||||
; andq $0x1f, %rcx
|
||||
; movd %ecx, %xmm5
|
||||
; vmovd %ecx, %xmm5
|
||||
; vpsrad %xmm5, %xmm0, %xmm0
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
@@ -1315,7 +1315,7 @@ block0(v0: i8):
|
||||
; pushq %rbp
|
||||
; movq %rsp, %rbp
|
||||
; block0:
|
||||
; movd %edi, %xmm2
|
||||
; vmovd %edi, %xmm2
|
||||
; uninit %xmm4
|
||||
; vpxor %xmm4, %xmm4, %xmm6
|
||||
; vpshufb %xmm2, %xmm6, %xmm0
|
||||
@@ -1328,7 +1328,7 @@ block0(v0: i8):
|
||||
; pushq %rbp
|
||||
; movq %rsp, %rbp
|
||||
; block1: ; offset 0x4
|
||||
; movd %edi, %xmm2
|
||||
; vmovd %edi, %xmm2
|
||||
; vpxor %xmm4, %xmm4, %xmm6
|
||||
; vpshufb %xmm6, %xmm2, %xmm0
|
||||
; movq %rbp, %rsp
|
||||
@@ -1389,7 +1389,7 @@ block0(v0: i8x16, v1: i32):
|
||||
; block0:
|
||||
; movq %rdi, %r10
|
||||
; andq %r10, $7, %r10
|
||||
; movd %r10d, %xmm5
|
||||
; vmovd %r10d, %xmm5
|
||||
; vpsllw %xmm0, %xmm5, %xmm7
|
||||
; lea const(0), %rsi
|
||||
; shlq $4, %r10, %r10
|
||||
@@ -1406,7 +1406,7 @@ block0(v0: i8x16, v1: i32):
|
||||
; block1: ; offset 0x4
|
||||
; movq %rdi, %r10
|
||||
; andq $7, %r10
|
||||
; movd %r10d, %xmm5
|
||||
; vmovd %r10d, %xmm5
|
||||
; vpsllw %xmm5, %xmm0, %xmm7
|
||||
; leaq 0x15(%rip), %rsi
|
||||
; shlq $4, %r10
|
||||
@@ -1461,7 +1461,7 @@ block0(v0: i16x8, v1: i32):
|
||||
; block0:
|
||||
; movq %rdi, %rcx
|
||||
; andq %rcx, $15, %rcx
|
||||
; movd %ecx, %xmm5
|
||||
; vmovd %ecx, %xmm5
|
||||
; vpsllw %xmm0, %xmm5, %xmm0
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
@@ -1474,7 +1474,7 @@ block0(v0: i16x8, v1: i32):
|
||||
; block1: ; offset 0x4
|
||||
; movq %rdi, %rcx
|
||||
; andq $0xf, %rcx
|
||||
; movd %ecx, %xmm5
|
||||
; vmovd %ecx, %xmm5
|
||||
; vpsllw %xmm5, %xmm0, %xmm0
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
@@ -1518,7 +1518,7 @@ block0(v0: i32x4, v1: i32):
|
||||
; block0:
|
||||
; movq %rdi, %rcx
|
||||
; andq %rcx, $31, %rcx
|
||||
; movd %ecx, %xmm5
|
||||
; vmovd %ecx, %xmm5
|
||||
; vpslld %xmm0, %xmm5, %xmm0
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
@@ -1531,7 +1531,7 @@ block0(v0: i32x4, v1: i32):
|
||||
; block1: ; offset 0x4
|
||||
; movq %rdi, %rcx
|
||||
; andq $0x1f, %rcx
|
||||
; movd %ecx, %xmm5
|
||||
; vmovd %ecx, %xmm5
|
||||
; vpslld %xmm5, %xmm0, %xmm0
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
@@ -1575,7 +1575,7 @@ block0(v0: i64x2, v1: i32):
|
||||
; block0:
|
||||
; movq %rdi, %rcx
|
||||
; andq %rcx, $63, %rcx
|
||||
; movd %ecx, %xmm5
|
||||
; vmovd %ecx, %xmm5
|
||||
; vpsllq %xmm0, %xmm5, %xmm0
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
@@ -1588,7 +1588,7 @@ block0(v0: i64x2, v1: i32):
|
||||
; block1: ; offset 0x4
|
||||
; movq %rdi, %rcx
|
||||
; andq $0x3f, %rcx
|
||||
; movd %ecx, %xmm5
|
||||
; vmovd %ecx, %xmm5
|
||||
; vpsllq %xmm5, %xmm0, %xmm0
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
@@ -1632,7 +1632,7 @@ block0(v0: i8x16, v1: i32):
|
||||
; block0:
|
||||
; movq %rdi, %r10
|
||||
; andq %r10, $7, %r10
|
||||
; movd %r10d, %xmm5
|
||||
; vmovd %r10d, %xmm5
|
||||
; vpsrlw %xmm0, %xmm5, %xmm7
|
||||
; lea const(0), %rsi
|
||||
; shlq $4, %r10, %r10
|
||||
@@ -1648,7 +1648,7 @@ block0(v0: i8x16, v1: i32):
|
||||
; block1: ; offset 0x4
|
||||
; movq %rdi, %r10
|
||||
; andq $7, %r10
|
||||
; movd %r10d, %xmm5
|
||||
; vmovd %r10d, %xmm5
|
||||
; vpsrlw %xmm5, %xmm0, %xmm7
|
||||
; leaq 0x15(%rip), %rsi
|
||||
; shlq $4, %r10
|
||||
@@ -1713,7 +1713,7 @@ block0(v0: i16x8, v1: i32):
|
||||
; block0:
|
||||
; movq %rdi, %rcx
|
||||
; andq %rcx, $15, %rcx
|
||||
; movd %ecx, %xmm5
|
||||
; vmovd %ecx, %xmm5
|
||||
; vpsrlw %xmm0, %xmm5, %xmm0
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
@@ -1726,7 +1726,7 @@ block0(v0: i16x8, v1: i32):
|
||||
; block1: ; offset 0x4
|
||||
; movq %rdi, %rcx
|
||||
; andq $0xf, %rcx
|
||||
; movd %ecx, %xmm5
|
||||
; vmovd %ecx, %xmm5
|
||||
; vpsrlw %xmm5, %xmm0, %xmm0
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
@@ -1770,7 +1770,7 @@ block0(v0: i32x4, v1: i32):
|
||||
; block0:
|
||||
; movq %rdi, %rcx
|
||||
; andq %rcx, $31, %rcx
|
||||
; movd %ecx, %xmm5
|
||||
; vmovd %ecx, %xmm5
|
||||
; vpsrld %xmm0, %xmm5, %xmm0
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
@@ -1783,7 +1783,7 @@ block0(v0: i32x4, v1: i32):
|
||||
; block1: ; offset 0x4
|
||||
; movq %rdi, %rcx
|
||||
; andq $0x1f, %rcx
|
||||
; movd %ecx, %xmm5
|
||||
; vmovd %ecx, %xmm5
|
||||
; vpsrld %xmm5, %xmm0, %xmm0
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
@@ -1827,7 +1827,7 @@ block0(v0: i64x2, v1: i32):
|
||||
; block0:
|
||||
; movq %rdi, %rcx
|
||||
; andq %rcx, $63, %rcx
|
||||
; movd %ecx, %xmm5
|
||||
; vmovd %ecx, %xmm5
|
||||
; vpsrlq %xmm0, %xmm5, %xmm0
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
@@ -1840,7 +1840,7 @@ block0(v0: i64x2, v1: i32):
|
||||
; block1: ; offset 0x4
|
||||
; movq %rdi, %rcx
|
||||
; andq $0x3f, %rcx
|
||||
; movd %ecx, %xmm5
|
||||
; vmovd %ecx, %xmm5
|
||||
; vpsrlq %xmm5, %xmm0, %xmm0
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
|
||||
@@ -41,7 +41,7 @@ block0(v0: i64):
|
||||
; movq %rsp, %rbp
|
||||
; block0:
|
||||
; movl $-2147483648, %eax
|
||||
; movd %eax, %xmm4
|
||||
; vmovd %eax, %xmm4
|
||||
; vandnps %xmm4, const(0), %xmm6
|
||||
; vandps %xmm4, 0(%rdi), %xmm8
|
||||
; vorps %xmm6, %xmm8, %xmm0
|
||||
@@ -55,7 +55,7 @@ block0(v0: i64):
|
||||
; movq %rsp, %rbp
|
||||
; block1: ; offset 0x4
|
||||
; movl $0x80000000, %eax
|
||||
; movd %eax, %xmm4
|
||||
; vmovd %eax, %xmm4
|
||||
; vandnps 0x1b(%rip), %xmm4, %xmm6
|
||||
; vandps (%rdi), %xmm4, %xmm8
|
||||
; vorps %xmm8, %xmm6, %xmm0
|
||||
|
||||
@@ -12,7 +12,7 @@ block0(v0: i8):
|
||||
; pushq %rbp
|
||||
; movq %rsp, %rbp
|
||||
; block0:
|
||||
; movd %edi, %xmm2
|
||||
; vmovd %edi, %xmm2
|
||||
; uninit %xmm4
|
||||
; vpxor %xmm4, %xmm4, %xmm6
|
||||
; vpshufb %xmm2, %xmm6, %xmm0
|
||||
@@ -25,7 +25,7 @@ block0(v0: i8):
|
||||
; pushq %rbp
|
||||
; movq %rsp, %rbp
|
||||
; block1: ; offset 0x4
|
||||
; movd %edi, %xmm2
|
||||
; vmovd %edi, %xmm2
|
||||
; vpxor %xmm4, %xmm4, %xmm6
|
||||
; vpshufb %xmm6, %xmm2, %xmm0
|
||||
; movq %rbp, %rsp
|
||||
@@ -42,7 +42,7 @@ block0(v0: i16):
|
||||
; pushq %rbp
|
||||
; movq %rsp, %rbp
|
||||
; block0:
|
||||
; movd %edi, %xmm2
|
||||
; vmovd %edi, %xmm2
|
||||
; vpshuflw $0, %xmm2, %xmm4
|
||||
; vpshufd $0, %xmm4, %xmm0
|
||||
; movq %rbp, %rsp
|
||||
@@ -54,7 +54,7 @@ block0(v0: i16):
|
||||
; pushq %rbp
|
||||
; movq %rsp, %rbp
|
||||
; block1: ; offset 0x4
|
||||
; movd %edi, %xmm2
|
||||
; vmovd %edi, %xmm2
|
||||
; vpshuflw $0, %xmm2, %xmm4
|
||||
; vpshufd $0, %xmm4, %xmm0
|
||||
; movq %rbp, %rsp
|
||||
@@ -71,7 +71,7 @@ block0(v0: i32):
|
||||
; pushq %rbp
|
||||
; movq %rsp, %rbp
|
||||
; block0:
|
||||
; movd %edi, %xmm2
|
||||
; vmovd %edi, %xmm2
|
||||
; vpshufd $0, %xmm2, %xmm0
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
@@ -82,7 +82,7 @@ block0(v0: i32):
|
||||
; pushq %rbp
|
||||
; movq %rsp, %rbp
|
||||
; block1: ; offset 0x4
|
||||
; movd %edi, %xmm2
|
||||
; vmovd %edi, %xmm2
|
||||
; vpshufd $0, %xmm2, %xmm0
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
@@ -98,7 +98,7 @@ block0(v0: i64):
|
||||
; pushq %rbp
|
||||
; movq %rsp, %rbp
|
||||
; block0:
|
||||
; movq %rdi, %xmm2
|
||||
; vmovq %rdi, %xmm2
|
||||
; vmovddup %xmm2, %xmm0
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
@@ -109,7 +109,7 @@ block0(v0: i64):
|
||||
; pushq %rbp
|
||||
; movq %rsp, %rbp
|
||||
; block1: ; offset 0x4
|
||||
; movq %rdi, %xmm2
|
||||
; vmovq %rdi, %xmm2
|
||||
; vmovddup %xmm2, %xmm0
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
|
||||
@@ -12,7 +12,7 @@ block0(v0: i8):
|
||||
; pushq %rbp
|
||||
; movq %rsp, %rbp
|
||||
; block0:
|
||||
; movd %edi, %xmm2
|
||||
; vmovd %edi, %xmm2
|
||||
; vpbroadcastb %xmm2, %xmm0
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
@@ -23,7 +23,7 @@ block0(v0: i8):
|
||||
; pushq %rbp
|
||||
; movq %rsp, %rbp
|
||||
; block1: ; offset 0x4
|
||||
; movd %edi, %xmm2
|
||||
; vmovd %edi, %xmm2
|
||||
; vpbroadcastb %xmm2, %xmm0
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
@@ -39,7 +39,7 @@ block0(v0: i16):
|
||||
; pushq %rbp
|
||||
; movq %rsp, %rbp
|
||||
; block0:
|
||||
; movd %edi, %xmm2
|
||||
; vmovd %edi, %xmm2
|
||||
; vpbroadcastw %xmm2, %xmm0
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
@@ -50,7 +50,7 @@ block0(v0: i16):
|
||||
; pushq %rbp
|
||||
; movq %rsp, %rbp
|
||||
; block1: ; offset 0x4
|
||||
; movd %edi, %xmm2
|
||||
; vmovd %edi, %xmm2
|
||||
; vpbroadcastw %xmm2, %xmm0
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
@@ -66,7 +66,7 @@ block0(v0: i32):
|
||||
; pushq %rbp
|
||||
; movq %rsp, %rbp
|
||||
; block0:
|
||||
; movd %edi, %xmm2
|
||||
; vmovd %edi, %xmm2
|
||||
; vpbroadcastd %xmm2, %xmm0
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
@@ -77,7 +77,7 @@ block0(v0: i32):
|
||||
; pushq %rbp
|
||||
; movq %rsp, %rbp
|
||||
; block1: ; offset 0x4
|
||||
; movd %edi, %xmm2
|
||||
; vmovd %edi, %xmm2
|
||||
; vpbroadcastd %xmm2, %xmm0
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
@@ -93,7 +93,7 @@ block0(v0: i64):
|
||||
; pushq %rbp
|
||||
; movq %rsp, %rbp
|
||||
; block0:
|
||||
; movq %rdi, %xmm2
|
||||
; vmovq %rdi, %xmm2
|
||||
; vmovddup %xmm2, %xmm0
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
@@ -104,7 +104,7 @@ block0(v0: i64):
|
||||
; pushq %rbp
|
||||
; movq %rsp, %rbp
|
||||
; block1: ; offset 0x4
|
||||
; movq %rdi, %xmm2
|
||||
; vmovq %rdi, %xmm2
|
||||
; vmovddup %xmm2, %xmm0
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
|
||||
108
cranelift/filetests/filetests/isa/x64/vhigh_bits-avx.clif
Normal file
108
cranelift/filetests/filetests/isa/x64/vhigh_bits-avx.clif
Normal file
@@ -0,0 +1,108 @@
|
||||
test compile precise-output
|
||||
set enable_simd
|
||||
target x86_64 has_avx
|
||||
|
||||
function %f1(i8x16) -> i8 {
|
||||
block0(v0: i8x16):
|
||||
v1 = vhigh_bits.i8 v0
|
||||
return v1
|
||||
}
|
||||
|
||||
; VCode:
|
||||
; pushq %rbp
|
||||
; movq %rsp, %rbp
|
||||
; block0:
|
||||
; vpmovmskb %xmm0, %eax
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
; ret
|
||||
;
|
||||
; Disassembled:
|
||||
; block0: ; offset 0x0
|
||||
; pushq %rbp
|
||||
; movq %rsp, %rbp
|
||||
; block1: ; offset 0x4
|
||||
; vpmovmskb %xmm0, %eax
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
; retq
|
||||
|
||||
function %f3(i16x8) -> i8 {
|
||||
block0(v0: i16x8):
|
||||
v1 = vhigh_bits.i8 v0
|
||||
return v1
|
||||
}
|
||||
|
||||
; VCode:
|
||||
; pushq %rbp
|
||||
; movq %rsp, %rbp
|
||||
; block0:
|
||||
; vpacksswb %xmm0, %xmm0, %xmm2
|
||||
; vpmovmskb %xmm2, %eax
|
||||
; shrq $8, %rax, %rax
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
; ret
|
||||
;
|
||||
; Disassembled:
|
||||
; block0: ; offset 0x0
|
||||
; pushq %rbp
|
||||
; movq %rsp, %rbp
|
||||
; block1: ; offset 0x4
|
||||
; vpacksswb %xmm0, %xmm0, %xmm2
|
||||
; vpmovmskb %xmm2, %eax
|
||||
; shrq $8, %rax
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
; retq
|
||||
|
||||
function %f4(i32x4) -> i8 {
|
||||
block0(v0: i32x4):
|
||||
v1 = vhigh_bits.i8 v0
|
||||
return v1
|
||||
}
|
||||
|
||||
; VCode:
|
||||
; pushq %rbp
|
||||
; movq %rsp, %rbp
|
||||
; block0:
|
||||
; vmovmskps %xmm0, %eax
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
; ret
|
||||
;
|
||||
; Disassembled:
|
||||
; block0: ; offset 0x0
|
||||
; pushq %rbp
|
||||
; movq %rsp, %rbp
|
||||
; block1: ; offset 0x4
|
||||
; vmovmskps %xmm0, %eax
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
; retq
|
||||
|
||||
function %f5(i64x2) -> i8 {
|
||||
block0(v0: i64x2):
|
||||
v1 = vhigh_bits.i8 v0
|
||||
return v1
|
||||
}
|
||||
|
||||
; VCode:
|
||||
; pushq %rbp
|
||||
; movq %rsp, %rbp
|
||||
; block0:
|
||||
; vmovmskpd %xmm0, %eax
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
; ret
|
||||
;
|
||||
; Disassembled:
|
||||
; block0: ; offset 0x0
|
||||
; pushq %rbp
|
||||
; movq %rsp, %rbp
|
||||
; block1: ; offset 0x4
|
||||
; vmovmskpd %xmm0, %eax
|
||||
; movq %rbp, %rsp
|
||||
; popq %rbp
|
||||
; retq
|
||||
|
||||
Reference in New Issue
Block a user