Add x86 SIMD floating-point arithmetic

This commit is contained in:
Andrew Brown
2019-10-29 09:32:19 -07:00
parent 04db2a9f39
commit c8eb4e9612
4 changed files with 179 additions and 0 deletions

View File

@@ -417,6 +417,8 @@ pub(crate) fn define(
let fill = shared.by_name("fill");
let fill_nop = shared.by_name("fill_nop");
let floor = shared.by_name("floor");
let fmax = shared.by_name("fmax");
let fmin = shared.by_name("fmin");
let fmul = shared.by_name("fmul");
let fpromote = shared.by_name("fpromote");
let fsub = shared.by_name("fsub");
@@ -2081,6 +2083,29 @@ pub(crate) fn define(
rec_pfcmp.opcodes(&CMPPD),
);
// SIMD float arithmetic
for (ty, inst, opcodes) in &[
(F32, fadd, &ADDPS[..]),
(F64, fadd, &ADDPD[..]),
(F32, fsub, &SUBPS[..]),
(F64, fsub, &SUBPD[..]),
(F32, fmul, &MULPS[..]),
(F64, fmul, &MULPD[..]),
(F32, fdiv, &DIVPS[..]),
(F64, fdiv, &DIVPD[..]),
(F32, fmin, &MINPS[..]),
(F64, fmin, &MINPD[..]),
(F32, fmax, &MAXPS[..]),
(F64, fmax, &MAXPD[..]),
] {
let inst_ = inst.bind(vector(*ty, sse_vector_size));
e.enc_both(inst_, rec_fa.opcodes(opcodes));
}
for (ty, inst, opcodes) in &[(F32, sqrt, &SQRTPS[..]), (F64, sqrt, &SQRTPD[..])] {
let inst_ = inst.bind(vector(*ty, sse_vector_size));
e.enc_both(inst_, rec_furm.opcodes(opcodes));
}
// Reference type instructions
// Null references implemented as iconst 0.

View File

@@ -15,6 +15,14 @@ pub static ADD_IMM: [u8; 1] = [0x81];
/// Add sign-extended imm8 to r/m{16,32,64}.
pub static ADD_IMM8_SIGN_EXTEND: [u8; 1] = [0x83];
/// Add packed double-precision floating-point values from xmm2/mem to xmm1 and store result in
/// xmm1 (SSE2).
pub static ADDPD: [u8; 3] = [0x66, 0x0f, 0x58];
/// Add packed single-precision floating-point values from xmm2/mem to xmm1 and store result in
/// xmm1 (SSE).
pub static ADDPS: [u8; 2] = [0x0f, 0x58];
/// Add the low double-precision floating-point value from xmm2/mem to xmm1
/// and store the result in xmm1.
pub static ADDSD: [u8; 3] = [0xf2, 0x0f, 0x58];
@@ -93,6 +101,14 @@ pub static CVTTSS2SI: [u8; 3] = [0xf3, 0x0f, 0x2c];
/// Unsigned divide for {16,32,64}-bit.
pub static DIV: [u8; 1] = [0xf7];
/// Divide packed double-precision floating-point values in xmm1 by packed double-precision
/// floating-point values in xmm2/mem (SSE2).
pub static DIVPD: [u8; 3] = [0x66, 0x0f, 0x5e];
/// Divide packed single-precision floating-point values in xmm1 by packed single-precision
/// floating-point values in xmm2/mem (SSE).
pub static DIVPS: [u8; 2] = [0x0f, 0x5e];
/// Divide low double-precision floating-point value in xmm1 by low double-precision
/// floating-point value in xmm2/m64.
pub static DIVSD: [u8; 3] = [0xf2, 0x0f, 0x5e];
@@ -142,6 +158,14 @@ pub static LEA: [u8; 1] = [0x8d];
/// Count the number of leading zero bits.
pub static LZCNT: [u8; 3] = [0xf3, 0x0f, 0xbd];
/// Return the maximum packed double-precision floating-point values between xmm1 and xmm2/m128
/// (SSE2).
pub static MAXPD: [u8; 3] = [0x66, 0x0f, 0x5f];
/// Return the maximum packed single-precision floating-point values between xmm1 and xmm2/m128
/// (SSE).
pub static MAXPS: [u8; 2] = [0x0f, 0x5f];
/// Return the maximum scalar double-precision floating-point value between
/// xmm2/m64 and xmm1.
pub static MAXSD: [u8; 3] = [0xf2, 0x0f, 0x5f];
@@ -150,6 +174,14 @@ pub static MAXSD: [u8; 3] = [0xf2, 0x0f, 0x5f];
/// xmm2/m32 and xmm1.
pub static MAXSS: [u8; 3] = [0xf3, 0x0f, 0x5f];
/// Return the minimum packed double-precision floating-point values between xmm1 and xmm2/m128
/// (SSE2).
pub static MINPD: [u8; 3] = [0x66, 0x0f, 0x5d];
/// Return the minimum packed single-precision floating-point values between xmm1 and xmm2/m128
/// (SSE).
pub static MINPS: [u8; 2] = [0x0f, 0x5d];
/// Return the minimum scalar double-precision floating-point value between
/// xmm2/m64 and xmm1.
pub static MINSD: [u8; 3] = [0xf2, 0x0f, 0x5d];
@@ -224,6 +256,14 @@ pub static MOVZX_WORD: [u8; 2] = [0x0f, 0xb7];
/// Unsigned multiply for {16,32,64}-bit.
pub static MUL: [u8; 1] = [0xf7];
/// Multiply packed double-precision floating-point values from xmm2/mem to xmm1 and store result
/// in xmm1 (SSE2).
pub static MULPD: [u8; 3] = [0x66, 0x0f, 0x59];
/// Multiply packed single-precision floating-point values from xmm2/mem to xmm1 and store result
/// in xmm1 (SSE).
pub static MULPS: [u8; 2] = [0x0f, 0x59];
/// Multiply the low double-precision floating-point value in xmm2/m64 by the
/// low double-precision floating-point value in xmm1.
pub static MULSD: [u8; 3] = [0xf2, 0x0f, 0x59];
@@ -474,6 +514,14 @@ pub static SBB: [u8; 1] = [0x19];
/// Set byte if overflow (OF=1).
pub static SET_BYTE_IF_OVERFLOW: [u8; 2] = [0x0f, 0x90];
/// Compute the square root of the packed double-precision floating-point values and store the
/// result in xmm1 (SSE2).
pub static SQRTPD: [u8; 3] = [0x66, 0x0f, 0x51];
/// Compute the square root of the packed double-precision floating-point values and store the
/// result in xmm1 (SSE).
pub static SQRTPS: [u8; 2] = [0x0f, 0x51];
/// Compute square root of scalar double-precision floating-point value.
pub static SQRTSD: [u8; 3] = [0xf2, 0x0f, 0x51];
@@ -483,6 +531,14 @@ pub static SQRTSS: [u8; 3] = [0xf3, 0x0f, 0x51];
/// Subtract r{16,32,64} from r/m of same size.
pub static SUB: [u8; 1] = [0x29];
/// Subtract packed double-precision floating-point values in xmm2/mem from xmm1 and store result
/// in xmm1 (SSE2).
pub static SUBPD: [u8; 3] = [0x66, 0x0f, 0x5c];
/// Subtract packed single-precision floating-point values in xmm2/mem from xmm1 and store result
/// in xmm1 (SSE).
pub static SUBPS: [u8; 2] = [0x0f, 0x5c];
/// Subtract the low double-precision floating-point value in xmm2/m64 from xmm1
/// and store the result in xmm1.
pub static SUBSD: [u8; 3] = [0xf2, 0x0f, 0x5c];