x64: Migrate fcvt_from_sint and fcvt_low_from_sint to ISLE (#4650)
https://github.com/bytecodealliance/wasmtime/pull/4650
This commit is contained in:
@@ -172,6 +172,7 @@ enum ExtSpec {
|
||||
#[allow(dead_code)]
|
||||
ZeroExtendTo32,
|
||||
ZeroExtendTo64,
|
||||
#[allow(dead_code)]
|
||||
SignExtendTo32,
|
||||
#[allow(dead_code)] // not used just yet but may be used in the future!
|
||||
SignExtendTo64,
|
||||
@@ -626,56 +627,12 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
| Opcode::GetReturnAddress
|
||||
| Opcode::Select
|
||||
| Opcode::Selectif
|
||||
| Opcode::SelectifSpectreGuard => {
|
||||
| Opcode::SelectifSpectreGuard
|
||||
| Opcode::FcvtFromSint
|
||||
| Opcode::FcvtLowFromSint => {
|
||||
implemented_in_isle(ctx);
|
||||
}
|
||||
|
||||
Opcode::FcvtFromSint => {
|
||||
let output_ty = ty.unwrap();
|
||||
if !output_ty.is_vector() {
|
||||
let (ext_spec, src_size) = match ctx.input_ty(insn, 0) {
|
||||
types::I8 | types::I16 => (Some(ExtSpec::SignExtendTo32), OperandSize::Size32),
|
||||
types::I32 => (None, OperandSize::Size32),
|
||||
types::I64 => (None, OperandSize::Size64),
|
||||
_ => unreachable!(),
|
||||
};
|
||||
|
||||
let src = match ext_spec {
|
||||
Some(ext_spec) => RegMem::reg(extend_input_to_reg(ctx, inputs[0], ext_spec)),
|
||||
None => RegMem::reg(put_input_in_reg(ctx, inputs[0])),
|
||||
};
|
||||
|
||||
let opcode = if output_ty == types::F32 {
|
||||
SseOpcode::Cvtsi2ss
|
||||
} else {
|
||||
assert_eq!(output_ty, types::F64);
|
||||
SseOpcode::Cvtsi2sd
|
||||
};
|
||||
let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
|
||||
ctx.emit(Inst::gpr_to_xmm(opcode, src, src_size, dst));
|
||||
} else {
|
||||
let ty = ty.unwrap();
|
||||
let src = put_input_in_reg(ctx, inputs[0]);
|
||||
let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
|
||||
let opcode = match ctx.input_ty(insn, 0) {
|
||||
types::I32X4 => SseOpcode::Cvtdq2ps,
|
||||
_ => {
|
||||
unimplemented!("unable to use type {} for op {}", ctx.input_ty(insn, 0), op)
|
||||
}
|
||||
};
|
||||
ctx.emit(Inst::gen_move(dst, src, ty));
|
||||
ctx.emit(Inst::xmm_rm_r(opcode, RegMem::from(dst), dst));
|
||||
}
|
||||
}
|
||||
Opcode::FcvtLowFromSint => {
|
||||
let src = RegMem::reg(put_input_in_reg(ctx, inputs[0]));
|
||||
let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
|
||||
ctx.emit(Inst::xmm_unary_rm_r(
|
||||
SseOpcode::Cvtdq2pd,
|
||||
RegMem::from(src),
|
||||
dst,
|
||||
));
|
||||
}
|
||||
Opcode::FcvtFromUint => {
|
||||
let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
|
||||
let ty = ty.unwrap();
|
||||
@@ -844,11 +801,19 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
ctx.emit(Inst::xmm_rm_r(SseOpcode::Psubd, RegMem::from(tmp), dst));
|
||||
|
||||
// Convert the low 16 bits
|
||||
ctx.emit(Inst::xmm_rm_r(SseOpcode::Cvtdq2ps, RegMem::from(tmp), tmp));
|
||||
ctx.emit(Inst::xmm_unary_rm_r(
|
||||
SseOpcode::Cvtdq2ps,
|
||||
RegMem::from(tmp),
|
||||
tmp,
|
||||
));
|
||||
|
||||
// Shift the high bits by 1, convert, and double to get the correct value.
|
||||
ctx.emit(Inst::xmm_rmi_reg(SseOpcode::Psrld, RegMemImm::imm(1), dst));
|
||||
ctx.emit(Inst::xmm_rm_r(SseOpcode::Cvtdq2ps, RegMem::from(dst), dst));
|
||||
ctx.emit(Inst::xmm_unary_rm_r(
|
||||
SseOpcode::Cvtdq2ps,
|
||||
RegMem::from(dst),
|
||||
dst,
|
||||
));
|
||||
ctx.emit(Inst::xmm_rm_r(
|
||||
SseOpcode::Addps,
|
||||
RegMem::reg(dst.to_reg()),
|
||||
@@ -938,7 +903,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
));
|
||||
|
||||
// Convert the packed float to packed doubleword.
|
||||
ctx.emit(Inst::xmm_rm_r(
|
||||
ctx.emit(Inst::xmm_unary_rm_r(
|
||||
SseOpcode::Cvttps2dq,
|
||||
RegMem::reg(dst.to_reg()),
|
||||
dst,
|
||||
@@ -1031,7 +996,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
|
||||
ctx.emit(Inst::xmm_rm_r(SseOpcode::Pcmpeqd, RegMem::from(tmp2), tmp2));
|
||||
ctx.emit(Inst::xmm_rmi_reg(SseOpcode::Psrld, RegMemImm::imm(1), tmp2));
|
||||
ctx.emit(Inst::xmm_rm_r(
|
||||
ctx.emit(Inst::xmm_unary_rm_r(
|
||||
SseOpcode::Cvtdq2ps,
|
||||
RegMem::from(tmp2),
|
||||
tmp2,
|
||||
@@ -1041,7 +1006,11 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
// Overflow lanes greater than the maximum allowed signed value will
|
||||
// set to 0x80000000. Negative and NaN lanes will be 0x0
|
||||
ctx.emit(Inst::xmm_mov(SseOpcode::Movaps, RegMem::from(dst), tmp1));
|
||||
ctx.emit(Inst::xmm_rm_r(SseOpcode::Cvttps2dq, RegMem::from(dst), dst));
|
||||
ctx.emit(Inst::xmm_unary_rm_r(
|
||||
SseOpcode::Cvttps2dq,
|
||||
RegMem::from(dst),
|
||||
dst,
|
||||
));
|
||||
|
||||
// Set lanes to src - max_signed_int
|
||||
ctx.emit(Inst::xmm_rm_r(SseOpcode::Subps, RegMem::from(tmp2), tmp1));
|
||||
@@ -1058,7 +1027,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
));
|
||||
|
||||
// Convert those set of lanes that have the max_signed_int factored out.
|
||||
ctx.emit(Inst::xmm_rm_r(
|
||||
ctx.emit(Inst::xmm_unary_rm_r(
|
||||
SseOpcode::Cvttps2dq,
|
||||
RegMem::from(tmp1),
|
||||
tmp1,
|
||||
@@ -1416,7 +1385,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
tmp1,
|
||||
));
|
||||
ctx.emit(Inst::xmm_rm_r(SseOpcode::Minpd, RegMem::from(tmp1), dst));
|
||||
ctx.emit(Inst::xmm_rm_r(
|
||||
ctx.emit(Inst::xmm_unary_rm_r(
|
||||
SseOpcode::Cvttpd2dq,
|
||||
RegMem::from(dst),
|
||||
dst,
|
||||
|
||||
Reference in New Issue
Block a user