diff --git a/cranelift/filetests/isa/intel/binary32-float.cton b/cranelift/filetests/isa/intel/binary32-float.cton index a815ad3a52..4a4361707c 100644 --- a/cranelift/filetests/isa/intel/binary32-float.cton +++ b/cranelift/filetests/isa/intel/binary32-float.cton @@ -39,6 +39,29 @@ ebb0: ; asm: divss %xmm5, %xmm2 [-,%xmm2] v27 = fdiv v11, v10 ; bin: f3 0f 5e d5 + ; Bitwise ops. + ; We use the *ps SSE instructions for everything because they are smaller. + + ; asm: andps %xmm2, %xmm5 + [-,%xmm5] v30 = band v10, v11 ; bin: 0f 54 ea + ; asm: andps %xmm5, %xmm2 + [-,%xmm2] v31 = band v11, v10 ; bin: 0f 54 d5 + + ; asm: andnps %xmm2, %xmm5 + [-,%xmm5] v32 = band_not v10, v11 ; bin: 0f 55 ea + ; asm: andnps %xmm5, %xmm2 + [-,%xmm2] v33 = band_not v11, v10 ; bin: 0f 55 d5 + + ; asm: orps %xmm2, %xmm5 + [-,%xmm5] v34 = bor v10, v11 ; bin: 0f 56 ea + ; asm: orps %xmm5, %xmm2 + [-,%xmm2] v35 = bor v11, v10 ; bin: 0f 56 d5 + + ; asm: xorps %xmm2, %xmm5 + [-,%xmm5] v36 = bxor v10, v11 ; bin: 0f 57 ea + ; asm: xorps %xmm5, %xmm2 + [-,%xmm2] v37 = bxor v11, v10 ; bin: 0f 57 d5 + return } @@ -74,5 +97,28 @@ ebb0: ; asm: divsd %xmm5, %xmm2 [-,%xmm2] v27 = fdiv v11, v10 ; bin: f2 0f 5e d5 + ; Bitwise ops. + ; We use the *ps SSE instructions for everything because they are smaller. + + ; asm: andps %xmm2, %xmm5 + [-,%xmm5] v30 = band v10, v11 ; bin: 0f 54 ea + ; asm: andps %xmm5, %xmm2 + [-,%xmm2] v31 = band v11, v10 ; bin: 0f 54 d5 + + ; asm: andnps %xmm2, %xmm5 + [-,%xmm5] v32 = band_not v10, v11 ; bin: 0f 55 ea + ; asm: andnps %xmm5, %xmm2 + [-,%xmm2] v33 = band_not v11, v10 ; bin: 0f 55 d5 + + ; asm: orps %xmm2, %xmm5 + [-,%xmm5] v34 = bor v10, v11 ; bin: 0f 56 ea + ; asm: orps %xmm5, %xmm2 + [-,%xmm2] v35 = bor v11, v10 ; bin: 0f 56 d5 + + ; asm: xorps %xmm2, %xmm5 + [-,%xmm5] v36 = bxor v10, v11 ; bin: 0f 57 ea + ; asm: xorps %xmm5, %xmm2 + [-,%xmm2] v37 = bxor v11, v10 ; bin: 0f 57 d5 + return } diff --git a/cranelift/filetests/isa/intel/binary64-float.cton b/cranelift/filetests/isa/intel/binary64-float.cton index fcf78c71f1..ba604ad43c 100644 --- a/cranelift/filetests/isa/intel/binary64-float.cton +++ b/cranelift/filetests/isa/intel/binary64-float.cton @@ -47,6 +47,29 @@ ebb0: ; asm: divss %xmm5, %xmm10 [-,%xmm10] v27 = fdiv v11, v10 ; bin: f3 44 0f 5e d5 + ; Bitwise ops. + ; We use the *ps SSE instructions for everything because they are smaller. + + ; asm: andps %xmm10, %xmm5 + [-,%xmm5] v30 = band v10, v11 ; bin: 41 0f 54 ea + ; asm: andps %xmm5, %xmm10 + [-,%xmm10] v31 = band v11, v10 ; bin: 44 0f 54 d5 + + ; asm: andnps %xmm10, %xmm5 + [-,%xmm5] v32 = band_not v10, v11 ; bin: 41 0f 55 ea + ; asm: andnps %xmm5, %xmm10 + [-,%xmm10] v33 = band_not v11, v10 ; bin: 44 0f 55 d5 + + ; asm: orps %xmm10, %xmm5 + [-,%xmm5] v34 = bor v10, v11 ; bin: 41 0f 56 ea + ; asm: orps %xmm5, %xmm10 + [-,%xmm10] v35 = bor v11, v10 ; bin: 44 0f 56 d5 + + ; asm: xorps %xmm10, %xmm5 + [-,%xmm5] v36 = bxor v10, v11 ; bin: 41 0f 57 ea + ; asm: xorps %xmm5, %xmm10 + [-,%xmm10] v37 = bxor v11, v10 ; bin: 44 0f 57 d5 + return } @@ -72,22 +95,45 @@ ebb0: ; asm: addsd %xmm10, %xmm5 [-,%xmm5] v20 = fadd v10, v11 ; bin: f2 41 0f 58 ea ; asm: addsd %xmm5, %xmm10 - [-,%xmm10] v21 = fadd v11, v10 ; bin: f2 44 0f 58 d5 + [-,%xmm10] v21 = fadd v11, v10 ; bin: f2 44 0f 58 d5 ; asm: subsd %xmm10, %xmm5 [-,%xmm5] v22 = fsub v10, v11 ; bin: f2 41 0f 5c ea ; asm: subsd %xmm5, %xmm10 - [-,%xmm10] v23 = fsub v11, v10 ; bin: f2 44 0f 5c d5 + [-,%xmm10] v23 = fsub v11, v10 ; bin: f2 44 0f 5c d5 ; asm: mulsd %xmm10, %xmm5 [-,%xmm5] v24 = fmul v10, v11 ; bin: f2 41 0f 59 ea ; asm: mulsd %xmm5, %xmm10 - [-,%xmm10] v25 = fmul v11, v10 ; bin: f2 44 0f 59 d5 + [-,%xmm10] v25 = fmul v11, v10 ; bin: f2 44 0f 59 d5 ; asm: divsd %xmm10, %xmm5 [-,%xmm5] v26 = fdiv v10, v11 ; bin: f2 41 0f 5e ea ; asm: divsd %xmm5, %xmm10 - [-,%xmm10] v27 = fdiv v11, v10 ; bin: f2 44 0f 5e d5 + [-,%xmm10] v27 = fdiv v11, v10 ; bin: f2 44 0f 5e d5 + + ; Bitwise ops. + ; We use the *ps SSE instructions for everything because they are smaller. + + ; asm: andps %xmm10, %xmm5 + [-,%xmm5] v30 = band v10, v11 ; bin: 41 0f 54 ea + ; asm: andps %xmm5, %xmm10 + [-,%xmm10] v31 = band v11, v10 ; bin: 44 0f 54 d5 + + ; asm: andnps %xmm10, %xmm5 + [-,%xmm5] v32 = band_not v10, v11 ; bin: 41 0f 55 ea + ; asm: andnps %xmm5, %xmm10 + [-,%xmm10] v33 = band_not v11, v10 ; bin: 44 0f 55 d5 + + ; asm: orps %xmm10, %xmm5 + [-,%xmm5] v34 = bor v10, v11 ; bin: 41 0f 56 ea + ; asm: orps %xmm5, %xmm10 + [-,%xmm10] v35 = bor v11, v10 ; bin: 44 0f 56 d5 + + ; asm: xorps %xmm10, %xmm5 + [-,%xmm5] v36 = bxor v10, v11 ; bin: 41 0f 57 ea + ; asm: xorps %xmm5, %xmm10 + [-,%xmm10] v37 = bxor v11, v10 ; bin: 44 0f 57 d5 return } diff --git a/lib/cretonne/meta/isa/intel/encodings.py b/lib/cretonne/meta/isa/intel/encodings.py index 42d54a539f..f155795b6f 100644 --- a/lib/cretonne/meta/isa/intel/encodings.py +++ b/lib/cretonne/meta/isa/intel/encodings.py @@ -224,7 +224,7 @@ I64.enc(base.fcvt_from_sint.f64.i32, *r.furm.rex(0xf2, 0x0f, 0x2A)) I64.enc(base.fcvt_from_sint.f64.i32, *r.furm(0xf2, 0x0f, 0x2A)) # Binary arithmetic ops. -for inst, opc in [ +for inst, opc in [ (base.fadd, 0x58), (base.fsub, 0x5c), (base.fmul, 0x59), @@ -236,3 +236,17 @@ for inst, opc in [ I32.enc(inst.f64, *r.frm(0xf2, 0x0f, opc)) I64.enc(inst.f64, *r.frm.rex(0xf2, 0x0f, opc)) I64.enc(inst.f64, *r.frm(0xf2, 0x0f, opc)) + +# Binary bitwise ops. +for inst, opc in [ + (base.band, 0x54), + (base.band_not, 0x55), + (base.bor, 0x56), + (base.bxor, 0x57)]: + I32.enc(inst.f32, *r.frm(0x0f, opc)) + I64.enc(inst.f32, *r.frm.rex(0x0f, opc)) + I64.enc(inst.f32, *r.frm(0x0f, opc)) + + I32.enc(inst.f64, *r.frm(0x0f, opc)) + I64.enc(inst.f64, *r.frm.rex(0x0f, opc)) + I64.enc(inst.f64, *r.frm(0x0f, opc))