machinst x64: add basic packed FP arithmetic

Includes instruction definition of packed min/max.
2020-07-24 12:39:04 -07:00
parent 77cc2f69c1
commit e3bd8d696b
3 changed files with 110 additions and 54 deletions
--- a/cranelift/codegen/src/isa/x64/lower.rs
+++ b/cranelift/codegen/src/isa/x64/lower.rs
@@ -51,14 +51,6 @@ fn is_bool_ty(ty: Type) -> bool {
    }
 }

-fn is_float_ty(ty: Type) -> bool {
-    match ty {
-        types::F32 | types::F64 => true,
-        types::R32 => panic!("shouldn't have 32-bits refs on x64"),
-        _ => false,
-    }
-}
-
 fn int_ty_is_64(ty: Type) -> bool {
    match ty {
        types::I8 | types::I16 | types::I32 => false,
@@ -67,14 +59,6 @@ fn int_ty_is_64(ty: Type) -> bool {
    }
 }

-fn flt_ty_is_64(ty: Type) -> bool {
-    match ty {
-        types::F32 => false,
-        types::F64 => true,
-        _ => panic!("type {} is none of F32, F64", ty),
-    }
-}
-
 fn iri_to_u64_imm(ctx: Ctx, inst: IRInst) -> Option<u64> {
    ctx.get_constant(inst)
 }
@@ -1081,32 +1065,54 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
        }

        Opcode::Fadd | Opcode::Fsub | Opcode::Fmul | Opcode::Fdiv => {
-            let lhs = input_to_reg_mem(ctx, inputs[0]);
-            let rhs = input_to_reg(ctx, inputs[1]);
+            let lhs = input_to_reg(ctx, inputs[0]);
+            let rhs = input_to_reg_mem(ctx, inputs[1]);
            let dst = output_to_reg(ctx, outputs[0]);
+            let ty = ty.unwrap();
+
+            // Move the `lhs` to the same register as `dst`; this may not emit an actual move
+            // but ensures that the registers are the same to match x86's read-write operand
+            // encoding.
+            ctx.emit(Inst::gen_move(dst, lhs, ty));

            // Note: min and max can't be handled here, because of the way Cranelift defines them:
            // if any operand is a NaN, they must return the NaN operand, while the x86 machine
-            // instruction will return the other operand.
-            let (f32_op, f64_op) = match op {
-                Opcode::Fadd => (SseOpcode::Addss, SseOpcode::Addsd),
-                Opcode::Fsub => (SseOpcode::Subss, SseOpcode::Subsd),
-                Opcode::Fmul => (SseOpcode::Mulss, SseOpcode::Mulsd),
-                Opcode::Fdiv => (SseOpcode::Divss, SseOpcode::Divsd),
-                _ => unreachable!(),
+            // instruction will return the second operand if either operand is a NaN.
+            let sse_op = match ty {
+                types::F32 => match op {
+                    Opcode::Fadd => SseOpcode::Addss,
+                    Opcode::Fsub => SseOpcode::Subss,
+                    Opcode::Fmul => SseOpcode::Mulss,
+                    Opcode::Fdiv => SseOpcode::Divss,
+                    _ => unreachable!(),
+                },
+                types::F64 => match op {
+                    Opcode::Fadd => SseOpcode::Addsd,
+                    Opcode::Fsub => SseOpcode::Subsd,
+                    Opcode::Fmul => SseOpcode::Mulsd,
+                    Opcode::Fdiv => SseOpcode::Divsd,
+                    _ => unreachable!(),
+                },
+                types::F32X4 => match op {
+                    Opcode::Fadd => SseOpcode::Addps,
+                    Opcode::Fsub => SseOpcode::Subps,
+                    Opcode::Fmul => SseOpcode::Mulps,
+                    Opcode::Fdiv => SseOpcode::Divps,
+                    _ => unreachable!(),
+                },
+                types::F64X2 => match op {
+                    Opcode::Fadd => SseOpcode::Addpd,
+                    Opcode::Fsub => SseOpcode::Subpd,
+                    Opcode::Fmul => SseOpcode::Mulpd,
+                    Opcode::Fdiv => SseOpcode::Divpd,
+                    _ => unreachable!(),
+                },
+                _ => panic!(
+                    "invalid type: expected one of [F32, F64, F32X4, F64X2], found {}",
+                    ty
+                ),
            };
-
-            let is_64 = flt_ty_is_64(ty.unwrap());
-
-            let mov_op = if is_64 {
-                SseOpcode::Movsd
-            } else {
-                SseOpcode::Movss
-            };
-            ctx.emit(Inst::xmm_mov(mov_op, lhs, dst, None));
-
-            let sse_op = if is_64 { f64_op } else { f32_op };
-            ctx.emit(Inst::xmm_rm_r(sse_op, RegMem::reg(rhs), dst));
+            ctx.emit(Inst::xmm_rm_r(sse_op, rhs, dst));
        }

        Opcode::Fmin | Opcode::Fmax => {
@@ -1127,17 +1133,19 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
        Opcode::Sqrt => {
            let src = input_to_reg_mem(ctx, inputs[0]);
            let dst = output_to_reg(ctx, outputs[0]);
+            let ty = ty.unwrap();

-            let (f32_op, f64_op) = match op {
-                Opcode::Sqrt => (SseOpcode::Sqrtss, SseOpcode::Sqrtsd),
-                _ => unreachable!(),
+            let sse_op = match ty {
+                types::F32 => SseOpcode::Sqrtss,
+                types::F64 => SseOpcode::Sqrtsd,
+                types::F32X4 => SseOpcode::Sqrtps,
+                types::F64X2 => SseOpcode::Sqrtpd,
+                _ => panic!(
+                    "invalid type: expected one of [F32, F64, F32X4, F64X2], found {}",
+                    ty
+                ),
            };

-            let sse_op = if flt_ty_is_64(ty.unwrap()) {
-                f64_op
-            } else {
-                f32_op
-            };
            ctx.emit(Inst::xmm_unary_rm_r(sse_op, src, dst));
        }