Add initial scalar FP operations (addss, subss, etc) to x64 backend.

Adds support for addss and subss. This is the first lowering for sse floating point alu and some move operations. The changes here do some renaming of data structures and adds a couple of new ones to support sse specific operations. The work done here will likely evolve as needed to support an efficient, inituative, and consistent framework.
2020-05-05 22:05:36 -07:00
parent e5b81bbc28
commit 48f0b10c7a
10 changed files with 503 additions and 66 deletions
--- a/cranelift/codegen/src/isa/x64/lower.rs
+++ b/cranelift/codegen/src/isa/x64/lower.rs
@@ -3,7 +3,7 @@
 #![allow(dead_code)]
 #![allow(non_snake_case)]

-use regalloc::{Reg, Writable};
+use regalloc::{Reg, RegClass, Writable};

 use crate::ir::condcodes::IntCC;
 use crate::ir::types;
@@ -31,7 +31,7 @@ fn is_int_ty(ty: Type) -> bool {
    }
 }

-fn int_ty_to_is64(ty: Type) -> bool {
+fn int_ty_is_64(ty: Type) -> bool {
    match ty {
        types::I8 | types::I16 | types::I32 => false,
        types::I64 => true,
@@ -39,6 +39,14 @@ fn int_ty_to_is64(ty: Type) -> bool {
    }
 }

+fn flt_ty_is_64(ty: Type) -> bool {
+    match ty {
+        types::F32 => false,
+        types::F64 => true,
+        _ => panic!("type {} is none of F32, F64", ty),
+    }
+}
+
 fn int_ty_to_sizeB(ty: Type) -> u8 {
    match ty {
        types::I8 => 1,
@@ -118,7 +126,6 @@ fn lower_insn_to_regs<'a>(ctx: Ctx<'a>, iri: IRInst) {
    };

    // This is all outstandingly feeble.  TODO: much better!
-
    match op {
        Opcode::Iconst => {
            if let Some(w64) = iri_to_u64_immediate(ctx, iri) {
@@ -136,7 +143,7 @@ fn lower_insn_to_regs<'a>(ctx: Ctx<'a>, iri: IRInst) {
            let regD = output_to_reg(ctx, iri, 0);
            let regL = input_to_reg(ctx, iri, 0);
            let regR = input_to_reg(ctx, iri, 1);
-            let is64 = int_ty_to_is64(ty.unwrap());
+            let is64 = int_ty_is_64(ty.unwrap());
            let how = if op == Opcode::Iadd {
                RMI_R_Op::Add
            } else {
@@ -195,7 +202,11 @@ fn lower_insn_to_regs<'a>(ctx: Ctx<'a>, iri: IRInst) {
            for i in 0..ctx.num_inputs(iri) {
                let src_reg = input_to_reg(ctx, iri, i);
                let retval_reg = ctx.retval(i);
-                ctx.emit(Inst::mov_r_r(true, src_reg, retval_reg));
+                if src_reg.get_class() == RegClass::I64 {
+                    ctx.emit(Inst::mov_r_r(true, src_reg, retval_reg));
+                } else if src_reg.get_class() == RegClass::V128 {
+                    ctx.emit(Inst::xmm_r_r(SSE_Op::SSE2_Movsd, src_reg, retval_reg));
+                }
            }
            // N.B.: the Ret itself is generated by the ABI.
        }
@@ -229,7 +240,23 @@ fn lower_insn_to_regs<'a>(ctx: Ctx<'a>, iri: IRInst) {
        | Opcode::SshrImm => {
            panic!("ALU+imm and ALU+carry ops should not appear here!");
        }
-
+        Opcode::Fadd | Opcode::Fsub => {
+            let regD = output_to_reg(ctx, iri, 0);
+            let regL = input_to_reg(ctx, iri, 0);
+            let regR = input_to_reg(ctx, iri, 1);
+            let is64 = flt_ty_is_64(ty.unwrap());
+            if !is64 {
+                let inst = if op == Opcode::Fadd {
+                    SSE_Op::SSE_Addss
+                } else {
+                    SSE_Op::SSE_Subss
+                };
+                ctx.emit(Inst::xmm_r_r(SSE_Op::SSE_Movss, regL, regD));
+                ctx.emit(Inst::xmm_rm_r(inst, RM::reg(regR), regD));
+            } else {
+                unimplemented!("unimplemented lowering for opcode {:?}", op);
+            }
+        }
        _ => unimplemented!("unimplemented lowering for opcode {:?}", op),
    }
 }