x64 backend: implement 128-bit ops and misc fixes.

This implements all of the ops on I128 that are implemented by the legacy x86 backend, and includes all that are required by at least one major use-case (cg_clif rustc backend). The sequences are open-coded where necessary; for e.g. the bit operations, this can be somewhat complex, but these sequences have been tested carefully. This PR also includes a drive-by fix of clz/ctz for 8- and 16-bit cases where they were incorrect previously. Also includes ridealong fixes developed while bringing up cg_clif support, because they are difficult to completely separate due to other refactors that occurred in this PR: - fix REX prefix logic for some 8-bit instructions. When using an 8-bit register in 64-bit mode on x86-64, the REX prefix semantics are somewhat subtle: without the REX prefix, register numbers 4--7 correspond to the second-to-lowest byte of the first four registers (AH, CH, BH, DH), whereas with the REX prefix, these register numbers correspond to the usual encoding (SPL, BPL, SIL, DIL). We could always emit a REX byte for instructions with 8-bit cases (this is harmless even if unneeded), but this would unnecessarily inflate code size; instead, the usual approach is to emit it only for these registers. This logic was present in some cases but missing for some other instructions: divide, not, negate, shifts. Fixes #2508. - avoid unaligned SSE loads on some f64 ops. The implementations of several FP ops, such as fabs/fneg, used SSE instructions. This is not a problem per-se, except that load-op merging did not take *alignment* into account. Specifically, if an op on an f64 loaded from memory happened to merge that load, and the instruction into which it was merged was an SSE instruction, then the SSE instruction imposes stricter (128-bit) alignment requirements than the load.f64 did. This PR simply forces any instruction lowerings that could use SSE instructions to implement non-SIMD operations to take inputs in registers only, and avoid load-op merging. Fixes #2507. - two bugfixes exposed by cg_clif: urem/srem.i8, select.b1. - urem/srem.i8: the 8-bit form of the DIV instruction on x86-64 places the remainder in AH, not RDX, different from all the other width-forms of this instruction. - select.b1: we were not recognizing selects of boolean values as integer-typed operations, so we were generating XMM moves instead (!).
2020-12-12 22:21:39 -08:00
parent 705af0ac41
commit 71ead6e31d
12 changed files with 3213 additions and 675 deletions
--- a/cranelift/codegen/src/isa/x64/inst/mod.rs
+++ b/cranelift/codegen/src/isa/x64/inst/mod.rs
@@ -1243,6 +1243,14 @@ impl PrettyPrint for Inst {
            (if is_64 { "q" } else { "l" }).to_string()
        }

+        fn suffix_lqb(is_64: bool, is_8: bool) -> String {
+            match (is_64, is_8) {
+                (_, true) => "b".to_string(),
+                (true, false) => "q".to_string(),
+                (false, false) => "l".to_string(),
+            }
+        }
+
        fn size_lq(is_64: bool) -> u8 {
            if is_64 {
                8
@@ -1251,6 +1259,16 @@ impl PrettyPrint for Inst {
            }
        }

+        fn size_lqb(is_64: bool, is_8: bool) -> u8 {
+            if is_8 {
+                1
+            } else if is_64 {
+                8
+            } else {
+                4
+            }
+        }
+
        fn suffix_bwlq(size: u8) -> String {
            match size {
                1 => "b".to_string(),
@@ -1271,9 +1289,9 @@ impl PrettyPrint for Inst {
                dst,
            } => format!(
                "{} {}, {}",
-                ljustify2(op.to_string(), suffix_lq(*is_64)),
-                src.show_rru_sized(mb_rru, size_lq(*is_64)),
-                show_ireg_sized(dst.to_reg(), mb_rru, size_lq(*is_64)),
+                ljustify2(op.to_string(), suffix_lqb(*is_64, op.is_8bit())),
+                src.show_rru_sized(mb_rru, size_lqb(*is_64, op.is_8bit())),
+                show_ireg_sized(dst.to_reg(), mb_rru, size_lqb(*is_64, op.is_8bit())),
            ),

            Inst::UnaryRmR { src, dst, op, size } => format!(
@@ -2065,6 +2083,17 @@ impl Amode {
            }
        }
    }
+
+    /// Offset the amode by a fixed offset.
+    pub(crate) fn offset(&self, offset: u32) -> Self {
+        let mut ret = self.clone();
+        match &mut ret {
+            &mut Amode::ImmReg { ref mut simm32, .. } => *simm32 += offset,
+            &mut Amode::ImmRegRegShift { ref mut simm32, .. } => *simm32 += offset,
+            _ => panic!("Cannot offset amode: {:?}", self),
+        }
+        ret
+    }
 }

 impl RegMemImm {
@@ -2548,77 +2577,88 @@ impl MachInst for Inst {
        ty: Type,
        mut alloc_tmp: F,
    ) -> SmallVec<[Self; 4]> {
-        // We don't support 128-bit constants.
-        assert!(value <= u64::MAX as u128);
        let mut ret = SmallVec::new();
-        let to_reg = to_regs
-            .only_reg()
-            .expect("multi-reg values not supported on x64");
-        if ty == types::F32 {
-            if value == 0 {
-                ret.push(Inst::xmm_rm_r(
-                    SseOpcode::Xorps,
-                    RegMem::reg(to_reg.to_reg()),
-                    to_reg,
-                ));
-            } else {
-                let tmp = alloc_tmp(types::I32);
-                ret.push(Inst::imm(OperandSize::Size32, value as u64, tmp));
-
-                ret.push(Inst::gpr_to_xmm(
-                    SseOpcode::Movd,
-                    RegMem::reg(tmp.to_reg()),
-                    OperandSize::Size32,
-                    to_reg,
-                ));
-            }
-        } else if ty == types::F64 {
-            if value == 0 {
-                ret.push(Inst::xmm_rm_r(
-                    SseOpcode::Xorpd,
-                    RegMem::reg(to_reg.to_reg()),
-                    to_reg,
-                ));
-            } else {
-                let tmp = alloc_tmp(types::I64);
-                ret.push(Inst::imm(OperandSize::Size64, value as u64, tmp));
-
-                ret.push(Inst::gpr_to_xmm(
-                    SseOpcode::Movq,
-                    RegMem::reg(tmp.to_reg()),
-                    OperandSize::Size64,
-                    to_reg,
-                ));
-            }
+        if ty == types::I128 {
+            ret.push(Inst::imm(
+                OperandSize::Size64,
+                value as u64,
+                to_regs.regs()[0],
+            ));
+            ret.push(Inst::imm(
+                OperandSize::Size64,
+                (value >> 64) as u64,
+                to_regs.regs()[1],
+            ));
        } else {
-            // Must be an integer type.
-            debug_assert!(
-                ty == types::B1
-                    || ty == types::I8
-                    || ty == types::B8
-                    || ty == types::I16
-                    || ty == types::B16
-                    || ty == types::I32
-                    || ty == types::B32
-                    || ty == types::I64
-                    || ty == types::B64
-                    || ty == types::R32
-                    || ty == types::R64
-            );
-            if value == 0 {
-                ret.push(Inst::alu_rmi_r(
-                    ty == types::I64,
-                    AluRmiROpcode::Xor,
-                    RegMemImm::reg(to_reg.to_reg()),
-                    to_reg,
-                ));
+            let to_reg = to_regs
+                .only_reg()
+                .expect("multi-reg values not supported on x64");
+            if ty == types::F32 {
+                if value == 0 {
+                    ret.push(Inst::xmm_rm_r(
+                        SseOpcode::Xorps,
+                        RegMem::reg(to_reg.to_reg()),
+                        to_reg,
+                    ));
+                } else {
+                    let tmp = alloc_tmp(types::I32);
+                    ret.push(Inst::imm(OperandSize::Size32, value as u64, tmp));
+
+                    ret.push(Inst::gpr_to_xmm(
+                        SseOpcode::Movd,
+                        RegMem::reg(tmp.to_reg()),
+                        OperandSize::Size32,
+                        to_reg,
+                    ));
+                }
+            } else if ty == types::F64 {
+                if value == 0 {
+                    ret.push(Inst::xmm_rm_r(
+                        SseOpcode::Xorpd,
+                        RegMem::reg(to_reg.to_reg()),
+                        to_reg,
+                    ));
+                } else {
+                    let tmp = alloc_tmp(types::I64);
+                    ret.push(Inst::imm(OperandSize::Size64, value as u64, tmp));
+
+                    ret.push(Inst::gpr_to_xmm(
+                        SseOpcode::Movq,
+                        RegMem::reg(tmp.to_reg()),
+                        OperandSize::Size64,
+                        to_reg,
+                    ));
+                }
            } else {
-                let value = value as u64;
-                ret.push(Inst::imm(
-                    OperandSize::from_bytes(ty.bytes()),
-                    value.into(),
-                    to_reg,
-                ));
+                // Must be an integer type.
+                debug_assert!(
+                    ty == types::B1
+                        || ty == types::I8
+                        || ty == types::B8
+                        || ty == types::I16
+                        || ty == types::B16
+                        || ty == types::I32
+                        || ty == types::B32
+                        || ty == types::I64
+                        || ty == types::B64
+                        || ty == types::R32
+                        || ty == types::R64
+                );
+                if value == 0 {
+                    ret.push(Inst::alu_rmi_r(
+                        ty == types::I64,
+                        AluRmiROpcode::Xor,
+                        RegMemImm::reg(to_reg.to_reg()),
+                        to_reg,
+                    ));
+                } else {
+                    let value = value as u64;
+                    ret.push(Inst::imm(
+                        OperandSize::from_bytes(ty.bytes()),
+                        value.into(),
+                        to_reg,
+                    ));
+                }
            }
        }
        ret