x64: Migrate iadd_pairwise to ISLE (#4718)
* Add a test for iadd_pairwise with swiden input
* Implement iadd_pairwise for swiden_{low,high} input
* Add a test case for iadd_pairwise with uwiden input
* Implement iadd_pairwise with uwiden
This commit is contained in:
@@ -2582,6 +2582,10 @@
|
||||
dst))))
|
||||
dst))
|
||||
|
||||
(decl x64_pmaddubsw (Xmm XmmMem) Xmm)
|
||||
(rule (x64_pmaddubsw src1 src2)
|
||||
(xmm_rm_r $I8X16 (SseOpcode.Pmaddubsw) src1 src2))
|
||||
|
||||
;; Helper for creating `insertps` instructions.
|
||||
(decl x64_insertps (Xmm XmmMem u8) Xmm)
|
||||
(rule (x64_insertps src1 src2 lane)
|
||||
@@ -3255,6 +3259,20 @@
|
||||
(ConsumesFlags.ConsumesFlagsSideEffect
|
||||
(MInst.JmpTableSeq idx tmp1 tmp2 default_target jt_targets)))))
|
||||
|
||||
;;;; iadd_pairwise constants ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
(decl iadd_pairwise_mul_const_16 () VCodeConstant)
|
||||
(extern constructor iadd_pairwise_mul_const_16 iadd_pairwise_mul_const_16)
|
||||
|
||||
(decl iadd_pairwise_mul_const_32 () VCodeConstant)
|
||||
(extern constructor iadd_pairwise_mul_const_32 iadd_pairwise_mul_const_32)
|
||||
|
||||
(decl iadd_pairwise_xor_const_32 () VCodeConstant)
|
||||
(extern constructor iadd_pairwise_xor_const_32 iadd_pairwise_xor_const_32)
|
||||
|
||||
(decl iadd_pairwise_addd_const_32 () VCodeConstant)
|
||||
(extern constructor iadd_pairwise_addd_const_32 iadd_pairwise_addd_const_32)
|
||||
|
||||
;;;; Comparisons ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
(type IcmpCondResult (enum (Condition (producer ProducesFlags) (cc CC))))
|
||||
|
||||
@@ -3189,3 +3189,40 @@
|
||||
;; Add this second set of converted lanes to the original to properly handle
|
||||
;; values greater than max signed int.
|
||||
(x64_paddd tmp1 dst)))
|
||||
|
||||
;; Rules for `iadd_pairwise` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
(rule (lower
|
||||
(has_type $I16X8 (iadd_pairwise
|
||||
(swiden_low val @ (value_type $I8X16))
|
||||
(swiden_high val))))
|
||||
(let ((mul_const Xmm (x64_xmm_load_const $I8X16 (iadd_pairwise_mul_const_16))))
|
||||
(x64_pmaddubsw mul_const val)))
|
||||
|
||||
(rule (lower
|
||||
(has_type $I32X4 (iadd_pairwise
|
||||
(swiden_low val @ (value_type $I16X8))
|
||||
(swiden_high val))))
|
||||
(let ((mul_const Xmm (x64_xmm_load_const $I16X8 (iadd_pairwise_mul_const_32))))
|
||||
(x64_pmaddwd val mul_const)))
|
||||
|
||||
(rule (lower
|
||||
(has_type $I16X8 (iadd_pairwise
|
||||
(uwiden_low val @ (value_type $I8X16))
|
||||
(uwiden_high val))))
|
||||
(let ((mul_const Xmm (x64_xmm_load_const $I8X16 (iadd_pairwise_mul_const_16))))
|
||||
(x64_pmaddubsw val mul_const)))
|
||||
|
||||
(rule (lower
|
||||
(has_type $I32X4 (iadd_pairwise
|
||||
(uwiden_low val @ (value_type $I16X8))
|
||||
(uwiden_high val))))
|
||||
(let ((xor_const Xmm (x64_xmm_load_const $I16X8 (iadd_pairwise_xor_const_32)))
|
||||
(dst Xmm (x64_pxor val xor_const))
|
||||
|
||||
(madd_const Xmm (x64_xmm_load_const $I16X8 (iadd_pairwise_mul_const_32)))
|
||||
(dst Xmm (x64_pmaddwd dst madd_const))
|
||||
|
||||
(addd_const Xmm (x64_xmm_load_const $I16X8 (iadd_pairwise_addd_const_32))))
|
||||
(x64_paddd dst addd_const)))
|
||||
|
||||
|
||||
@@ -561,169 +561,11 @@ fn lower_insn_to_regs(
|
||||
| Opcode::FcvtToUint
|
||||
| Opcode::FcvtToSint
|
||||
| Opcode::FcvtToUintSat
|
||||
| Opcode::FcvtToSintSat => {
|
||||
| Opcode::FcvtToSintSat
|
||||
| Opcode::IaddPairwise => {
|
||||
implemented_in_isle(ctx);
|
||||
}
|
||||
|
||||
Opcode::IaddPairwise => {
|
||||
if let (Some(swiden_low), Some(swiden_high)) = (
|
||||
matches_input(ctx, inputs[0], Opcode::SwidenLow),
|
||||
matches_input(ctx, inputs[1], Opcode::SwidenHigh),
|
||||
) {
|
||||
let swiden_input = &[
|
||||
InsnInput {
|
||||
insn: swiden_low,
|
||||
input: 0,
|
||||
},
|
||||
InsnInput {
|
||||
insn: swiden_high,
|
||||
input: 0,
|
||||
},
|
||||
];
|
||||
|
||||
let input_ty = ctx.input_ty(swiden_low, 0);
|
||||
let output_ty = ctx.output_ty(insn, 0);
|
||||
let src0 = put_input_in_reg(ctx, swiden_input[0]);
|
||||
let src1 = put_input_in_reg(ctx, swiden_input[1]);
|
||||
let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
|
||||
if src0 != src1 {
|
||||
unimplemented!(
|
||||
"iadd_pairwise not implemented for general case with different inputs"
|
||||
);
|
||||
}
|
||||
match (input_ty, output_ty) {
|
||||
(types::I8X16, types::I16X8) => {
|
||||
static MUL_CONST: [u8; 16] = [0x01; 16];
|
||||
let mul_const = ctx.use_constant(VCodeConstantData::WellKnown(&MUL_CONST));
|
||||
let mul_const_reg = ctx.alloc_tmp(types::I8X16).only_reg().unwrap();
|
||||
ctx.emit(Inst::xmm_load_const(mul_const, mul_const_reg, types::I8X16));
|
||||
ctx.emit(Inst::xmm_mov(
|
||||
SseOpcode::Movdqa,
|
||||
RegMem::reg(mul_const_reg.to_reg()),
|
||||
dst,
|
||||
));
|
||||
ctx.emit(Inst::xmm_rm_r(SseOpcode::Pmaddubsw, RegMem::reg(src0), dst));
|
||||
}
|
||||
(types::I16X8, types::I32X4) => {
|
||||
static MUL_CONST: [u8; 16] = [
|
||||
0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00,
|
||||
0x01, 0x00, 0x01, 0x00,
|
||||
];
|
||||
let mul_const = ctx.use_constant(VCodeConstantData::WellKnown(&MUL_CONST));
|
||||
let mul_const_reg = ctx.alloc_tmp(types::I16X8).only_reg().unwrap();
|
||||
ctx.emit(Inst::xmm_load_const(mul_const, mul_const_reg, types::I16X8));
|
||||
ctx.emit(Inst::xmm_mov(SseOpcode::Movdqa, RegMem::reg(src0), dst));
|
||||
ctx.emit(Inst::xmm_rm_r(
|
||||
SseOpcode::Pmaddwd,
|
||||
RegMem::reg(mul_const_reg.to_reg()),
|
||||
dst,
|
||||
));
|
||||
}
|
||||
_ => {
|
||||
unimplemented!("Type not supported for {:?}", op);
|
||||
}
|
||||
}
|
||||
} else if let (Some(uwiden_low), Some(uwiden_high)) = (
|
||||
matches_input(ctx, inputs[0], Opcode::UwidenLow),
|
||||
matches_input(ctx, inputs[1], Opcode::UwidenHigh),
|
||||
) {
|
||||
let uwiden_input = &[
|
||||
InsnInput {
|
||||
insn: uwiden_low,
|
||||
input: 0,
|
||||
},
|
||||
InsnInput {
|
||||
insn: uwiden_high,
|
||||
input: 0,
|
||||
},
|
||||
];
|
||||
|
||||
let input_ty = ctx.input_ty(uwiden_low, 0);
|
||||
let output_ty = ctx.output_ty(insn, 0);
|
||||
let src0 = put_input_in_reg(ctx, uwiden_input[0]);
|
||||
let src1 = put_input_in_reg(ctx, uwiden_input[1]);
|
||||
let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
|
||||
if src0 != src1 {
|
||||
unimplemented!(
|
||||
"iadd_pairwise not implemented for general case with different inputs"
|
||||
);
|
||||
}
|
||||
match (input_ty, output_ty) {
|
||||
(types::I8X16, types::I16X8) => {
|
||||
static MUL_CONST: [u8; 16] = [0x01; 16];
|
||||
let mul_const = ctx.use_constant(VCodeConstantData::WellKnown(&MUL_CONST));
|
||||
let mul_const_reg = ctx.alloc_tmp(types::I8X16).only_reg().unwrap();
|
||||
ctx.emit(Inst::xmm_load_const(mul_const, mul_const_reg, types::I8X16));
|
||||
ctx.emit(Inst::xmm_mov(SseOpcode::Movdqa, RegMem::reg(src0), dst));
|
||||
ctx.emit(Inst::xmm_rm_r(
|
||||
SseOpcode::Pmaddubsw,
|
||||
RegMem::reg(mul_const_reg.to_reg()),
|
||||
dst,
|
||||
));
|
||||
}
|
||||
(types::I16X8, types::I32X4) => {
|
||||
static PXOR_CONST: [u8; 16] = [
|
||||
0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80,
|
||||
0x00, 0x80, 0x00, 0x80,
|
||||
];
|
||||
let pxor_const =
|
||||
ctx.use_constant(VCodeConstantData::WellKnown(&PXOR_CONST));
|
||||
let pxor_const_reg = ctx.alloc_tmp(types::I16X8).only_reg().unwrap();
|
||||
ctx.emit(Inst::xmm_load_const(
|
||||
pxor_const,
|
||||
pxor_const_reg,
|
||||
types::I16X8,
|
||||
));
|
||||
ctx.emit(Inst::xmm_mov(SseOpcode::Movdqa, RegMem::reg(src0), dst));
|
||||
ctx.emit(Inst::xmm_rm_r(
|
||||
SseOpcode::Pxor,
|
||||
RegMem::reg(pxor_const_reg.to_reg()),
|
||||
dst,
|
||||
));
|
||||
|
||||
static MADD_CONST: [u8; 16] = [
|
||||
0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00,
|
||||
0x01, 0x00, 0x01, 0x00,
|
||||
];
|
||||
let madd_const =
|
||||
ctx.use_constant(VCodeConstantData::WellKnown(&MADD_CONST));
|
||||
let madd_const_reg = ctx.alloc_tmp(types::I8X16).only_reg().unwrap();
|
||||
ctx.emit(Inst::xmm_load_const(
|
||||
madd_const,
|
||||
madd_const_reg,
|
||||
types::I16X8,
|
||||
));
|
||||
ctx.emit(Inst::xmm_rm_r(
|
||||
SseOpcode::Pmaddwd,
|
||||
RegMem::reg(madd_const_reg.to_reg()),
|
||||
dst,
|
||||
));
|
||||
static ADDD_CONST2: [u8; 16] = [
|
||||
0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00,
|
||||
0x00, 0x00, 0x01, 0x00,
|
||||
];
|
||||
let addd_const2 =
|
||||
ctx.use_constant(VCodeConstantData::WellKnown(&ADDD_CONST2));
|
||||
let addd_const2_reg = ctx.alloc_tmp(types::I8X16).only_reg().unwrap();
|
||||
ctx.emit(Inst::xmm_load_const(
|
||||
addd_const2,
|
||||
addd_const2_reg,
|
||||
types::I16X8,
|
||||
));
|
||||
ctx.emit(Inst::xmm_rm_r(
|
||||
SseOpcode::Paddd,
|
||||
RegMem::reg(addd_const2_reg.to_reg()),
|
||||
dst,
|
||||
));
|
||||
}
|
||||
_ => {
|
||||
unimplemented!("Type not supported for {:?}", op);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
unimplemented!("Operands not supported for {:?}", op);
|
||||
}
|
||||
}
|
||||
Opcode::UwidenHigh | Opcode::UwidenLow | Opcode::SwidenHigh | Opcode::SwidenLow => {
|
||||
let input_ty = ctx.input_ty(insn, 0);
|
||||
let output_ty = ctx.output_ty(insn, 0);
|
||||
|
||||
@@ -781,6 +781,30 @@ impl Context for IsleContext<'_, '_, MInst, Flags, IsaFlags, 6> {
|
||||
self.lower_ctx
|
||||
.use_constant(VCodeConstantData::WellKnown(&UINT_MASK_HIGH))
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn iadd_pairwise_mul_const_16(&mut self) -> VCodeConstant {
|
||||
self.lower_ctx
|
||||
.use_constant(VCodeConstantData::WellKnown(&IADD_PAIRWISE_MUL_CONST_16))
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn iadd_pairwise_mul_const_32(&mut self) -> VCodeConstant {
|
||||
self.lower_ctx
|
||||
.use_constant(VCodeConstantData::WellKnown(&IADD_PAIRWISE_MUL_CONST_32))
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn iadd_pairwise_xor_const_32(&mut self) -> VCodeConstant {
|
||||
self.lower_ctx
|
||||
.use_constant(VCodeConstantData::WellKnown(&IADD_PAIRWISE_XOR_CONST_32))
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn iadd_pairwise_addd_const_32(&mut self) -> VCodeConstant {
|
||||
self.lower_ctx
|
||||
.use_constant(VCodeConstantData::WellKnown(&IADD_PAIRWISE_ADDD_CONST_32))
|
||||
}
|
||||
}
|
||||
|
||||
impl IsleContext<'_, '_, MInst, Flags, IsaFlags, 6> {
|
||||
@@ -907,3 +931,17 @@ const UINT_MASK: [u8; 16] = [
|
||||
const UINT_MASK_HIGH: [u8; 16] = [
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x30, 0x43, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x30, 0x43,
|
||||
];
|
||||
|
||||
const IADD_PAIRWISE_MUL_CONST_16: [u8; 16] = [0x01; 16];
|
||||
|
||||
const IADD_PAIRWISE_MUL_CONST_32: [u8; 16] = [
|
||||
0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x01, 0x00,
|
||||
];
|
||||
|
||||
const IADD_PAIRWISE_XOR_CONST_32: [u8; 16] = [
|
||||
0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80,
|
||||
];
|
||||
|
||||
const IADD_PAIRWISE_ADDD_CONST_32: [u8; 16] = [
|
||||
0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00,
|
||||
];
|
||||
|
||||
Reference in New Issue
Block a user