ISLE: port iabs to ISLE for x64

This commit is contained in:
Nick Fitzgerald
2021-11-19 11:03:44 -08:00
parent ef8ea644f4
commit 94e0de45ed
6 changed files with 405 additions and 174 deletions

View File

@@ -22,6 +22,9 @@
(XmmUnaryRmR (op SseOpcode) (XmmUnaryRmR (op SseOpcode)
(src RegMem) (src RegMem)
(dst WritableReg)) (dst WritableReg))
(XmmUnaryRmREvex (op Avx512Opcode)
(src RegMem)
(dst WritableReg))
(XmmRmiReg (opcode SseOpcode) (XmmRmiReg (opcode SseOpcode)
(src1 Reg) (src1 Reg)
(src2 RegMemImm) (src2 RegMemImm)
@@ -347,6 +350,15 @@
(decl encode_fcmp_imm (FcmpImm) u8) (decl encode_fcmp_imm (FcmpImm) u8)
(extern constructor encode_fcmp_imm encode_fcmp_imm) (extern constructor encode_fcmp_imm encode_fcmp_imm)
;;;; Helpers for Getting Particular Physical Registers ;;;;;;;;;;;;;;;;;;;;;;;;;
;;
;; These should only be used for legalization purposes, when we can't otherwise
;; rely on something like `Inst::mov_mitosis` to put an operand into the
;; appropriate physical register for whatever reason.
(decl xmm0 () WritableReg)
(extern constructor xmm0 xmm0)
;;;; Helpers for Querying Enabled ISA Extensions ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;; Helpers for Querying Enabled ISA Extensions ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(decl avx512vl_enabled () Type) (decl avx512vl_enabled () Type)
@@ -355,6 +367,9 @@
(decl avx512dq_enabled () Type) (decl avx512dq_enabled () Type)
(extern extractor avx512dq_enabled avx512dq_enabled) (extern extractor avx512dq_enabled avx512dq_enabled)
(decl avx512f_enabled () Type)
(extern extractor avx512f_enabled avx512f_enabled)
;;;; Helpers for Merging and Sinking Immediates/Loads ;;;;;;;;;;;;;;;;;;;;;;;;; ;;;; Helpers for Merging and Sinking Immediates/Loads ;;;;;;;;;;;;;;;;;;;;;;;;;
;; Extract a constant `Imm8Reg.Imm8` from a value operand. ;; Extract a constant `Imm8Reg.Imm8` from a value operand.
@@ -926,6 +941,18 @@
(rule (pandn src1 src2) (rule (pandn src1 src2)
(xmm_rm_r $F64X2 (SseOpcode.Pandn) src1 src2)) (xmm_rm_r $F64X2 (SseOpcode.Pandn) src1 src2))
;; Helper for creating `blendvpd` instructions.
(decl blendvpd (Reg RegMem Reg) Reg)
(rule (blendvpd src1 src2 mask)
;; Move the mask into `xmm0`, as `blendvpd` implicitly operates on that
;; register. (This kind of thing would normally happen inside of
;; `Inst::mov_mitosis`, but has to happen here, where we still have the
;; mask register, because the mask is implicit and doesn't appear in the
;; `Inst` itself.)
(let ((mask2 WritableReg (xmm0))
(_ Unit (emit (MInst.XmmUnaryRmR (SseOpcode.Movapd) (RegMem.Reg mask) mask2))))
(xmm_rm_r $F64X2 (SseOpcode.Blendvpd) src1 src2)))
;; Helper for creating `MInst.XmmRmRImm` instructions. ;; Helper for creating `MInst.XmmRmRImm` instructions.
(decl xmm_rm_r_imm (SseOpcode Reg RegMem u8 OperandSize) Reg) (decl xmm_rm_r_imm (SseOpcode Reg RegMem u8 OperandSize) Reg)
(rule (xmm_rm_r_imm op src1 src2 imm size) (rule (xmm_rm_r_imm op src1 src2 imm size)
@@ -977,6 +1004,33 @@
(rule (pmovzxbw src) (rule (pmovzxbw src)
(xmm_unary_rm_r (SseOpcode.Pmovzxbw) src)) (xmm_unary_rm_r (SseOpcode.Pmovzxbw) src))
;; Helper for creating `pabsb` instructions.
(decl pabsb (RegMem) Reg)
(rule (pabsb src)
(xmm_unary_rm_r (SseOpcode.Pabsb) src))
;; Helper for creating `pabsw` instructions.
(decl pabsw (RegMem) Reg)
(rule (pabsw src)
(xmm_unary_rm_r (SseOpcode.Pabsw) src))
;; Helper for creating `pabsd` instructions.
(decl pabsd (RegMem) Reg)
(rule (pabsd src)
(xmm_unary_rm_r (SseOpcode.Pabsd) src))
;; Helper for creating `MInst.XmmUnaryRmREvex` instructions.
(decl xmm_unary_rm_r_evex (Avx512Opcode RegMem) Reg)
(rule (xmm_unary_rm_r_evex op src)
(let ((dst WritableReg (temp_writable_reg $I8X16))
(_ Unit (emit (MInst.XmmUnaryRmREvex op src dst))))
(writable_reg_to_reg dst)))
;; Helper for creating `vpabsq` instructions.
(decl vpabsq (RegMem) Reg)
(rule (vpabsq src)
(xmm_unary_rm_r_evex (Avx512Opcode.Vpabsq) src))
;; Helper for creating `MInst.XmmRmREvex` instructions. ;; Helper for creating `MInst.XmmRmREvex` instructions.
(decl xmm_rm_r_evex (Avx512Opcode RegMem Reg) Reg) (decl xmm_rm_r_evex (Avx512Opcode RegMem Reg) Reg)
(rule (xmm_rm_r_evex op src1 src2) (rule (xmm_rm_r_evex op src1 src2)

View File

@@ -946,6 +946,33 @@
(rule (lower (has_type (multi_lane _bits _lanes) (band_not x y))) (rule (lower (has_type (multi_lane _bits _lanes) (band_not x y)))
(value_reg (pandn (put_in_reg y) (put_in_reg_mem x)))) (value_reg (pandn (put_in_reg y) (put_in_reg_mem x))))
;;;; Rules for `iabs` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (has_type $I8X16 (iabs x)))
(value_reg (pabsb (put_in_reg_mem x))))
(rule (lower (has_type $I16X8 (iabs x)))
(value_reg (pabsw (put_in_reg_mem x))))
(rule (lower (has_type $I32X4 (iabs x)))
(value_reg (pabsd (put_in_reg_mem x))))
;; When AVX512 is available, we can use a single `vpabsq` instruction.
(rule (lower (has_type (and (avx512vl_enabled)
(avx512f_enabled)
$I64X2)
(iabs x)))
(value_reg (vpabsq (put_in_reg_mem x))))
;; Otherwise, we use a separate register, `neg`, to contain the results of `0 -
;; x` and then blend in those results with `blendvpd` if the MSB of `neg` was
;; set to 1 (i.e. if `neg` was negative or, conversely, if `x` was originally
;; positive).
(rule (lower (has_type $I64X2 (iabs x)))
(let ((rx Reg (put_in_reg x))
(neg Reg (psubq (imm $I64X2 0) (RegMem.Reg rx))))
(value_reg (blendvpd neg (RegMem.Reg rx) neg))))
;;;; Rules for `fabs` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;; Rules for `fabs` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Special case for `f32x4.abs`. ;; Special case for `f32x4.abs`.

View File

@@ -1504,6 +1504,14 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
return Ok(()); return Ok(());
} }
let implemented_in_isle = |ctx: &mut C| {
unreachable!(
"implemented in ISLE: inst = `{}`, type = `{:?}`",
ctx.dfg().display_inst(insn),
ty
)
};
match op { match op {
Opcode::Iconst Opcode::Iconst
| Opcode::Bconst | Opcode::Bconst
@@ -1520,54 +1528,8 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
| Opcode::Bor | Opcode::Bor
| Opcode::Bxor | Opcode::Bxor
| Opcode::Imul | Opcode::Imul
| Opcode::BandNot => { | Opcode::BandNot
unreachable!( | Opcode::Iabs => implemented_in_isle(ctx),
"implemented in ISLE: inst = `{}`, type = `{:?}`",
ctx.dfg().display_inst(insn),
ty
);
}
Opcode::Iabs => {
let src = input_to_reg_mem(ctx, inputs[0]);
let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
let ty = ty.unwrap();
if ty == types::I64X2 {
if isa_flags.use_avx512vl_simd() && isa_flags.use_avx512f_simd() {
ctx.emit(Inst::xmm_unary_rm_r_evex(Avx512Opcode::Vpabsq, src, dst));
} else {
// If `VPABSQ` from AVX512 is unavailable, we use a separate register, `tmp`, to
// contain the results of `0 - src` and then blend in those results with
// `BLENDVPD` if the MSB of `tmp` was set to 1 (i.e. if `tmp` was negative or,
// conversely, if `src` was originally positive).
// Emit all 0s into the `tmp` register.
let tmp = ctx.alloc_tmp(ty).only_reg().unwrap();
ctx.emit(Inst::xmm_rm_r(SseOpcode::Pxor, RegMem::from(tmp), tmp));
// Subtract the lanes from 0 and set up `dst`.
ctx.emit(Inst::xmm_rm_r(SseOpcode::Psubq, src.clone(), tmp));
ctx.emit(Inst::gen_move(dst, tmp.to_reg(), ty));
// Choose the subtracted lanes when `tmp` has an MSB of 1. BLENDVPD's semantics
// require the "choice" mask to be in XMM0.
ctx.emit(Inst::gen_move(
Writable::from_reg(regs::xmm0()),
tmp.to_reg(),
ty,
));
ctx.emit(Inst::xmm_rm_r(SseOpcode::Blendvpd, src, dst));
}
} else if ty.is_vector() {
let opcode = match ty {
types::I8X16 => SseOpcode::Pabsb,
types::I16X8 => SseOpcode::Pabsw,
types::I32X4 => SseOpcode::Pabsd,
_ => panic!("Unsupported type for packed iabs instruction: {}", ty),
};
ctx.emit(Inst::xmm_unary_rm_r(opcode, src, dst));
} else {
unimplemented!("iabs is unimplemented for non-vector type: {}", ty);
}
}
Opcode::Imax | Opcode::Umax | Opcode::Imin | Opcode::Umin => { Opcode::Imax | Opcode::Umax | Opcode::Imin | Opcode::Umin => {
let lhs = put_input_in_reg(ctx, inputs[0]); let lhs = put_input_in_reg(ctx, inputs[0]);

View File

@@ -8,6 +8,7 @@ use super::{
is_mergeable_load, lower_to_amode, AluRmiROpcode, Inst as MInst, OperandSize, Reg, RegMemImm, is_mergeable_load, lower_to_amode, AluRmiROpcode, Inst as MInst, OperandSize, Reg, RegMemImm,
}; };
use crate::isa::x64::inst::args::SyntheticAmode; use crate::isa::x64::inst::args::SyntheticAmode;
use crate::isa::x64::inst::regs;
use crate::isa::x64::settings as x64_settings; use crate::isa::x64::settings as x64_settings;
use crate::machinst::isle::*; use crate::machinst::isle::*;
use crate::{ use crate::{
@@ -145,6 +146,15 @@ where
} }
} }
#[inline]
fn avx512f_enabled(&mut self, _: Type) -> Option<()> {
if self.isa_flags.use_avx512f_simd() {
Some(())
} else {
None
}
}
#[inline] #[inline]
fn imm8_from_value(&mut self, val: Value) -> Option<Imm8Reg> { fn imm8_from_value(&mut self, val: Value) -> Option<Imm8Reg> {
let inst = self.lower_ctx.dfg().value_def(val).inst()?; let inst = self.lower_ctx.dfg().value_def(val).inst()?;
@@ -214,6 +224,11 @@ where
// shifted into bits 5:6). // shifted into bits 5:6).
0b00_00_00_00 | lane << 4 0b00_00_00_00 | lane << 4
} }
#[inline]
fn xmm0(&mut self) -> WritableReg {
WritableReg::from_reg(regs::xmm0())
}
} }
#[inline] #[inline]

View File

@@ -1,4 +1,4 @@
src/clif.isle 9c0563583e5500de00ec5e226edc0547ac3ea789c8d76f1da0401c80ec619320fdc9a6f17fd76bbcac74a5894f85385c1f51c900c2b83bc9906d03d0f29bf5cb src/clif.isle 9c0563583e5500de00ec5e226edc0547ac3ea789c8d76f1da0401c80ec619320fdc9a6f17fd76bbcac74a5894f85385c1f51c900c2b83bc9906d03d0f29bf5cb
src/prelude.isle e4933f2bcb6cd9e00cb6dc0c47c43d096d0c4e37468af17a38fad8906b864d975e0a8b98d15c6a5e2bccf255ec2ced2466991c3405533e9cafefbf4d9ac46823 src/prelude.isle e4933f2bcb6cd9e00cb6dc0c47c43d096d0c4e37468af17a38fad8906b864d975e0a8b98d15c6a5e2bccf255ec2ced2466991c3405533e9cafefbf4d9ac46823
src/isa/x64/inst.isle e4a0657406056a4cf116fe125e91d16377b602e0b41edd6628cbb7259b0fc2aa6b0482ffd33f00d63d68cf3546f188705877309d43eba5e75abd0f38a52a79b2 src/isa/x64/inst.isle 12dc8fa43cbba6e9c5cf46a2472e2754abfe33b7fd38f80e271afa3f6c002efad7a4202c8f00ff27d5e6176de8fec97e1887d382cbd4ef06eaac177a0b5992e3
src/isa/x64/lower.isle e51b7a67343dba342a43b3c9e4b9ed7df9b2c66a677018acf7054ba48c27e4e93a4421fd892b9bf7c0e5b790bcfafab7cb3e93ce2b8206c04d456918d2ad0b5a src/isa/x64/lower.isle 333e1be62f602bb835a3cebc3299290a3d386438e9190d2db219263d974e097bfc3f1afdaac9401853806d21d548cad70bab2ffbc3b1cf5c3bebdd971a961f70

File diff suppressed because it is too large Load Diff