Intel encodings for floating point bitwise ops.

band, bor, bxor, band_not are all available on XMM registers.
This commit is contained in:
Jakob Stoklund Olesen
2017-07-20 11:45:06 -07:00
parent 6ba604125d
commit 2b41f979cb
3 changed files with 111 additions and 5 deletions

View File

@@ -39,6 +39,29 @@ ebb0:
; asm: divss %xmm5, %xmm2
[-,%xmm2] v27 = fdiv v11, v10 ; bin: f3 0f 5e d5
; Bitwise ops.
; We use the *ps SSE instructions for everything because they are smaller.
; asm: andps %xmm2, %xmm5
[-,%xmm5] v30 = band v10, v11 ; bin: 0f 54 ea
; asm: andps %xmm5, %xmm2
[-,%xmm2] v31 = band v11, v10 ; bin: 0f 54 d5
; asm: andnps %xmm2, %xmm5
[-,%xmm5] v32 = band_not v10, v11 ; bin: 0f 55 ea
; asm: andnps %xmm5, %xmm2
[-,%xmm2] v33 = band_not v11, v10 ; bin: 0f 55 d5
; asm: orps %xmm2, %xmm5
[-,%xmm5] v34 = bor v10, v11 ; bin: 0f 56 ea
; asm: orps %xmm5, %xmm2
[-,%xmm2] v35 = bor v11, v10 ; bin: 0f 56 d5
; asm: xorps %xmm2, %xmm5
[-,%xmm5] v36 = bxor v10, v11 ; bin: 0f 57 ea
; asm: xorps %xmm5, %xmm2
[-,%xmm2] v37 = bxor v11, v10 ; bin: 0f 57 d5
return
}
@@ -74,5 +97,28 @@ ebb0:
; asm: divsd %xmm5, %xmm2
[-,%xmm2] v27 = fdiv v11, v10 ; bin: f2 0f 5e d5
; Bitwise ops.
; We use the *ps SSE instructions for everything because they are smaller.
; asm: andps %xmm2, %xmm5
[-,%xmm5] v30 = band v10, v11 ; bin: 0f 54 ea
; asm: andps %xmm5, %xmm2
[-,%xmm2] v31 = band v11, v10 ; bin: 0f 54 d5
; asm: andnps %xmm2, %xmm5
[-,%xmm5] v32 = band_not v10, v11 ; bin: 0f 55 ea
; asm: andnps %xmm5, %xmm2
[-,%xmm2] v33 = band_not v11, v10 ; bin: 0f 55 d5
; asm: orps %xmm2, %xmm5
[-,%xmm5] v34 = bor v10, v11 ; bin: 0f 56 ea
; asm: orps %xmm5, %xmm2
[-,%xmm2] v35 = bor v11, v10 ; bin: 0f 56 d5
; asm: xorps %xmm2, %xmm5
[-,%xmm5] v36 = bxor v10, v11 ; bin: 0f 57 ea
; asm: xorps %xmm5, %xmm2
[-,%xmm2] v37 = bxor v11, v10 ; bin: 0f 57 d5
return
}

View File

@@ -47,6 +47,29 @@ ebb0:
; asm: divss %xmm5, %xmm10
[-,%xmm10] v27 = fdiv v11, v10 ; bin: f3 44 0f 5e d5
; Bitwise ops.
; We use the *ps SSE instructions for everything because they are smaller.
; asm: andps %xmm10, %xmm5
[-,%xmm5] v30 = band v10, v11 ; bin: 41 0f 54 ea
; asm: andps %xmm5, %xmm10
[-,%xmm10] v31 = band v11, v10 ; bin: 44 0f 54 d5
; asm: andnps %xmm10, %xmm5
[-,%xmm5] v32 = band_not v10, v11 ; bin: 41 0f 55 ea
; asm: andnps %xmm5, %xmm10
[-,%xmm10] v33 = band_not v11, v10 ; bin: 44 0f 55 d5
; asm: orps %xmm10, %xmm5
[-,%xmm5] v34 = bor v10, v11 ; bin: 41 0f 56 ea
; asm: orps %xmm5, %xmm10
[-,%xmm10] v35 = bor v11, v10 ; bin: 44 0f 56 d5
; asm: xorps %xmm10, %xmm5
[-,%xmm5] v36 = bxor v10, v11 ; bin: 41 0f 57 ea
; asm: xorps %xmm5, %xmm10
[-,%xmm10] v37 = bxor v11, v10 ; bin: 44 0f 57 d5
return
}
@@ -72,22 +95,45 @@ ebb0:
; asm: addsd %xmm10, %xmm5
[-,%xmm5] v20 = fadd v10, v11 ; bin: f2 41 0f 58 ea
; asm: addsd %xmm5, %xmm10
[-,%xmm10] v21 = fadd v11, v10 ; bin: f2 44 0f 58 d5
[-,%xmm10] v21 = fadd v11, v10 ; bin: f2 44 0f 58 d5
; asm: subsd %xmm10, %xmm5
[-,%xmm5] v22 = fsub v10, v11 ; bin: f2 41 0f 5c ea
; asm: subsd %xmm5, %xmm10
[-,%xmm10] v23 = fsub v11, v10 ; bin: f2 44 0f 5c d5
[-,%xmm10] v23 = fsub v11, v10 ; bin: f2 44 0f 5c d5
; asm: mulsd %xmm10, %xmm5
[-,%xmm5] v24 = fmul v10, v11 ; bin: f2 41 0f 59 ea
; asm: mulsd %xmm5, %xmm10
[-,%xmm10] v25 = fmul v11, v10 ; bin: f2 44 0f 59 d5
[-,%xmm10] v25 = fmul v11, v10 ; bin: f2 44 0f 59 d5
; asm: divsd %xmm10, %xmm5
[-,%xmm5] v26 = fdiv v10, v11 ; bin: f2 41 0f 5e ea
; asm: divsd %xmm5, %xmm10
[-,%xmm10] v27 = fdiv v11, v10 ; bin: f2 44 0f 5e d5
[-,%xmm10] v27 = fdiv v11, v10 ; bin: f2 44 0f 5e d5
; Bitwise ops.
; We use the *ps SSE instructions for everything because they are smaller.
; asm: andps %xmm10, %xmm5
[-,%xmm5] v30 = band v10, v11 ; bin: 41 0f 54 ea
; asm: andps %xmm5, %xmm10
[-,%xmm10] v31 = band v11, v10 ; bin: 44 0f 54 d5
; asm: andnps %xmm10, %xmm5
[-,%xmm5] v32 = band_not v10, v11 ; bin: 41 0f 55 ea
; asm: andnps %xmm5, %xmm10
[-,%xmm10] v33 = band_not v11, v10 ; bin: 44 0f 55 d5
; asm: orps %xmm10, %xmm5
[-,%xmm5] v34 = bor v10, v11 ; bin: 41 0f 56 ea
; asm: orps %xmm5, %xmm10
[-,%xmm10] v35 = bor v11, v10 ; bin: 44 0f 56 d5
; asm: xorps %xmm10, %xmm5
[-,%xmm5] v36 = bxor v10, v11 ; bin: 41 0f 57 ea
; asm: xorps %xmm5, %xmm10
[-,%xmm10] v37 = bxor v11, v10 ; bin: 44 0f 57 d5
return
}

View File

@@ -224,7 +224,7 @@ I64.enc(base.fcvt_from_sint.f64.i32, *r.furm.rex(0xf2, 0x0f, 0x2A))
I64.enc(base.fcvt_from_sint.f64.i32, *r.furm(0xf2, 0x0f, 0x2A))
# Binary arithmetic ops.
for inst, opc in [
for inst, opc in [
(base.fadd, 0x58),
(base.fsub, 0x5c),
(base.fmul, 0x59),
@@ -236,3 +236,17 @@ for inst, opc in [
I32.enc(inst.f64, *r.frm(0xf2, 0x0f, opc))
I64.enc(inst.f64, *r.frm.rex(0xf2, 0x0f, opc))
I64.enc(inst.f64, *r.frm(0xf2, 0x0f, opc))
# Binary bitwise ops.
for inst, opc in [
(base.band, 0x54),
(base.band_not, 0x55),
(base.bor, 0x56),
(base.bxor, 0x57)]:
I32.enc(inst.f32, *r.frm(0x0f, opc))
I64.enc(inst.f32, *r.frm.rex(0x0f, opc))
I64.enc(inst.f32, *r.frm(0x0f, opc))
I32.enc(inst.f64, *r.frm(0x0f, opc))
I64.enc(inst.f64, *r.frm.rex(0x0f, opc))
I64.enc(inst.f64, *r.frm(0x0f, opc))