ISLE: port iabs to ISLE for x64
This commit is contained in:
@@ -22,6 +22,9 @@
|
||||
(XmmUnaryRmR (op SseOpcode)
|
||||
(src RegMem)
|
||||
(dst WritableReg))
|
||||
(XmmUnaryRmREvex (op Avx512Opcode)
|
||||
(src RegMem)
|
||||
(dst WritableReg))
|
||||
(XmmRmiReg (opcode SseOpcode)
|
||||
(src1 Reg)
|
||||
(src2 RegMemImm)
|
||||
@@ -347,6 +350,15 @@
|
||||
(decl encode_fcmp_imm (FcmpImm) u8)
|
||||
(extern constructor encode_fcmp_imm encode_fcmp_imm)
|
||||
|
||||
;;;; Helpers for Getting Particular Physical Registers ;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;;
|
||||
;; These should only be used for legalization purposes, when we can't otherwise
|
||||
;; rely on something like `Inst::mov_mitosis` to put an operand into the
|
||||
;; appropriate physical register for whatever reason.
|
||||
|
||||
(decl xmm0 () WritableReg)
|
||||
(extern constructor xmm0 xmm0)
|
||||
|
||||
;;;; Helpers for Querying Enabled ISA Extensions ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
(decl avx512vl_enabled () Type)
|
||||
@@ -355,6 +367,9 @@
|
||||
(decl avx512dq_enabled () Type)
|
||||
(extern extractor avx512dq_enabled avx512dq_enabled)
|
||||
|
||||
(decl avx512f_enabled () Type)
|
||||
(extern extractor avx512f_enabled avx512f_enabled)
|
||||
|
||||
;;;; Helpers for Merging and Sinking Immediates/Loads ;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
;; Extract a constant `Imm8Reg.Imm8` from a value operand.
|
||||
@@ -926,6 +941,18 @@
|
||||
(rule (pandn src1 src2)
|
||||
(xmm_rm_r $F64X2 (SseOpcode.Pandn) src1 src2))
|
||||
|
||||
;; Helper for creating `blendvpd` instructions.
|
||||
(decl blendvpd (Reg RegMem Reg) Reg)
|
||||
(rule (blendvpd src1 src2 mask)
|
||||
;; Move the mask into `xmm0`, as `blendvpd` implicitly operates on that
|
||||
;; register. (This kind of thing would normally happen inside of
|
||||
;; `Inst::mov_mitosis`, but has to happen here, where we still have the
|
||||
;; mask register, because the mask is implicit and doesn't appear in the
|
||||
;; `Inst` itself.)
|
||||
(let ((mask2 WritableReg (xmm0))
|
||||
(_ Unit (emit (MInst.XmmUnaryRmR (SseOpcode.Movapd) (RegMem.Reg mask) mask2))))
|
||||
(xmm_rm_r $F64X2 (SseOpcode.Blendvpd) src1 src2)))
|
||||
|
||||
;; Helper for creating `MInst.XmmRmRImm` instructions.
|
||||
(decl xmm_rm_r_imm (SseOpcode Reg RegMem u8 OperandSize) Reg)
|
||||
(rule (xmm_rm_r_imm op src1 src2 imm size)
|
||||
@@ -977,6 +1004,33 @@
|
||||
(rule (pmovzxbw src)
|
||||
(xmm_unary_rm_r (SseOpcode.Pmovzxbw) src))
|
||||
|
||||
;; Helper for creating `pabsb` instructions.
|
||||
(decl pabsb (RegMem) Reg)
|
||||
(rule (pabsb src)
|
||||
(xmm_unary_rm_r (SseOpcode.Pabsb) src))
|
||||
|
||||
;; Helper for creating `pabsw` instructions.
|
||||
(decl pabsw (RegMem) Reg)
|
||||
(rule (pabsw src)
|
||||
(xmm_unary_rm_r (SseOpcode.Pabsw) src))
|
||||
|
||||
;; Helper for creating `pabsd` instructions.
|
||||
(decl pabsd (RegMem) Reg)
|
||||
(rule (pabsd src)
|
||||
(xmm_unary_rm_r (SseOpcode.Pabsd) src))
|
||||
|
||||
;; Helper for creating `MInst.XmmUnaryRmREvex` instructions.
|
||||
(decl xmm_unary_rm_r_evex (Avx512Opcode RegMem) Reg)
|
||||
(rule (xmm_unary_rm_r_evex op src)
|
||||
(let ((dst WritableReg (temp_writable_reg $I8X16))
|
||||
(_ Unit (emit (MInst.XmmUnaryRmREvex op src dst))))
|
||||
(writable_reg_to_reg dst)))
|
||||
|
||||
;; Helper for creating `vpabsq` instructions.
|
||||
(decl vpabsq (RegMem) Reg)
|
||||
(rule (vpabsq src)
|
||||
(xmm_unary_rm_r_evex (Avx512Opcode.Vpabsq) src))
|
||||
|
||||
;; Helper for creating `MInst.XmmRmREvex` instructions.
|
||||
(decl xmm_rm_r_evex (Avx512Opcode RegMem Reg) Reg)
|
||||
(rule (xmm_rm_r_evex op src1 src2)
|
||||
|
||||
@@ -946,6 +946,33 @@
|
||||
(rule (lower (has_type (multi_lane _bits _lanes) (band_not x y)))
|
||||
(value_reg (pandn (put_in_reg y) (put_in_reg_mem x))))
|
||||
|
||||
;;;; Rules for `iabs` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
(rule (lower (has_type $I8X16 (iabs x)))
|
||||
(value_reg (pabsb (put_in_reg_mem x))))
|
||||
|
||||
(rule (lower (has_type $I16X8 (iabs x)))
|
||||
(value_reg (pabsw (put_in_reg_mem x))))
|
||||
|
||||
(rule (lower (has_type $I32X4 (iabs x)))
|
||||
(value_reg (pabsd (put_in_reg_mem x))))
|
||||
|
||||
;; When AVX512 is available, we can use a single `vpabsq` instruction.
|
||||
(rule (lower (has_type (and (avx512vl_enabled)
|
||||
(avx512f_enabled)
|
||||
$I64X2)
|
||||
(iabs x)))
|
||||
(value_reg (vpabsq (put_in_reg_mem x))))
|
||||
|
||||
;; Otherwise, we use a separate register, `neg`, to contain the results of `0 -
|
||||
;; x` and then blend in those results with `blendvpd` if the MSB of `neg` was
|
||||
;; set to 1 (i.e. if `neg` was negative or, conversely, if `x` was originally
|
||||
;; positive).
|
||||
(rule (lower (has_type $I64X2 (iabs x)))
|
||||
(let ((rx Reg (put_in_reg x))
|
||||
(neg Reg (psubq (imm $I64X2 0) (RegMem.Reg rx))))
|
||||
(value_reg (blendvpd neg (RegMem.Reg rx) neg))))
|
||||
|
||||
;;;; Rules for `fabs` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
;; Special case for `f32x4.abs`.
|
||||
|
||||
@@ -1504,6 +1504,14 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let implemented_in_isle = |ctx: &mut C| {
|
||||
unreachable!(
|
||||
"implemented in ISLE: inst = `{}`, type = `{:?}`",
|
||||
ctx.dfg().display_inst(insn),
|
||||
ty
|
||||
)
|
||||
};
|
||||
|
||||
match op {
|
||||
Opcode::Iconst
|
||||
| Opcode::Bconst
|
||||
@@ -1520,54 +1528,8 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
| Opcode::Bor
|
||||
| Opcode::Bxor
|
||||
| Opcode::Imul
|
||||
| Opcode::BandNot => {
|
||||
unreachable!(
|
||||
"implemented in ISLE: inst = `{}`, type = `{:?}`",
|
||||
ctx.dfg().display_inst(insn),
|
||||
ty
|
||||
);
|
||||
}
|
||||
|
||||
Opcode::Iabs => {
|
||||
let src = input_to_reg_mem(ctx, inputs[0]);
|
||||
let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
|
||||
let ty = ty.unwrap();
|
||||
if ty == types::I64X2 {
|
||||
if isa_flags.use_avx512vl_simd() && isa_flags.use_avx512f_simd() {
|
||||
ctx.emit(Inst::xmm_unary_rm_r_evex(Avx512Opcode::Vpabsq, src, dst));
|
||||
} else {
|
||||
// If `VPABSQ` from AVX512 is unavailable, we use a separate register, `tmp`, to
|
||||
// contain the results of `0 - src` and then blend in those results with
|
||||
// `BLENDVPD` if the MSB of `tmp` was set to 1 (i.e. if `tmp` was negative or,
|
||||
// conversely, if `src` was originally positive).
|
||||
|
||||
// Emit all 0s into the `tmp` register.
|
||||
let tmp = ctx.alloc_tmp(ty).only_reg().unwrap();
|
||||
ctx.emit(Inst::xmm_rm_r(SseOpcode::Pxor, RegMem::from(tmp), tmp));
|
||||
// Subtract the lanes from 0 and set up `dst`.
|
||||
ctx.emit(Inst::xmm_rm_r(SseOpcode::Psubq, src.clone(), tmp));
|
||||
ctx.emit(Inst::gen_move(dst, tmp.to_reg(), ty));
|
||||
// Choose the subtracted lanes when `tmp` has an MSB of 1. BLENDVPD's semantics
|
||||
// require the "choice" mask to be in XMM0.
|
||||
ctx.emit(Inst::gen_move(
|
||||
Writable::from_reg(regs::xmm0()),
|
||||
tmp.to_reg(),
|
||||
ty,
|
||||
));
|
||||
ctx.emit(Inst::xmm_rm_r(SseOpcode::Blendvpd, src, dst));
|
||||
}
|
||||
} else if ty.is_vector() {
|
||||
let opcode = match ty {
|
||||
types::I8X16 => SseOpcode::Pabsb,
|
||||
types::I16X8 => SseOpcode::Pabsw,
|
||||
types::I32X4 => SseOpcode::Pabsd,
|
||||
_ => panic!("Unsupported type for packed iabs instruction: {}", ty),
|
||||
};
|
||||
ctx.emit(Inst::xmm_unary_rm_r(opcode, src, dst));
|
||||
} else {
|
||||
unimplemented!("iabs is unimplemented for non-vector type: {}", ty);
|
||||
}
|
||||
}
|
||||
| Opcode::BandNot
|
||||
| Opcode::Iabs => implemented_in_isle(ctx),
|
||||
|
||||
Opcode::Imax | Opcode::Umax | Opcode::Imin | Opcode::Umin => {
|
||||
let lhs = put_input_in_reg(ctx, inputs[0]);
|
||||
|
||||
@@ -8,6 +8,7 @@ use super::{
|
||||
is_mergeable_load, lower_to_amode, AluRmiROpcode, Inst as MInst, OperandSize, Reg, RegMemImm,
|
||||
};
|
||||
use crate::isa::x64::inst::args::SyntheticAmode;
|
||||
use crate::isa::x64::inst::regs;
|
||||
use crate::isa::x64::settings as x64_settings;
|
||||
use crate::machinst::isle::*;
|
||||
use crate::{
|
||||
@@ -145,6 +146,15 @@ where
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn avx512f_enabled(&mut self, _: Type) -> Option<()> {
|
||||
if self.isa_flags.use_avx512f_simd() {
|
||||
Some(())
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn imm8_from_value(&mut self, val: Value) -> Option<Imm8Reg> {
|
||||
let inst = self.lower_ctx.dfg().value_def(val).inst()?;
|
||||
@@ -214,6 +224,11 @@ where
|
||||
// shifted into bits 5:6).
|
||||
0b00_00_00_00 | lane << 4
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn xmm0(&mut self) -> WritableReg {
|
||||
WritableReg::from_reg(regs::xmm0())
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
src/clif.isle 9c0563583e5500de00ec5e226edc0547ac3ea789c8d76f1da0401c80ec619320fdc9a6f17fd76bbcac74a5894f85385c1f51c900c2b83bc9906d03d0f29bf5cb
|
||||
src/prelude.isle e4933f2bcb6cd9e00cb6dc0c47c43d096d0c4e37468af17a38fad8906b864d975e0a8b98d15c6a5e2bccf255ec2ced2466991c3405533e9cafefbf4d9ac46823
|
||||
src/isa/x64/inst.isle e4a0657406056a4cf116fe125e91d16377b602e0b41edd6628cbb7259b0fc2aa6b0482ffd33f00d63d68cf3546f188705877309d43eba5e75abd0f38a52a79b2
|
||||
src/isa/x64/lower.isle e51b7a67343dba342a43b3c9e4b9ed7df9b2c66a677018acf7054ba48c27e4e93a4421fd892b9bf7c0e5b790bcfafab7cb3e93ce2b8206c04d456918d2ad0b5a
|
||||
src/isa/x64/inst.isle 12dc8fa43cbba6e9c5cf46a2472e2754abfe33b7fd38f80e271afa3f6c002efad7a4202c8f00ff27d5e6176de8fec97e1887d382cbd4ef06eaac177a0b5992e3
|
||||
src/isa/x64/lower.isle 333e1be62f602bb835a3cebc3299290a3d386438e9190d2db219263d974e097bfc3f1afdaac9401853806d21d548cad70bab2ffbc3b1cf5c3bebdd971a961f70
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user