Added Intel x86-64 encodings for 64bit loads and store instructions (#127)

* Added Intel x86-64 encodings for 64bit loads and store instructions * Using GPR registers instead of ABCD for istore8 with REX prefix Fixed testing of 64bit intel encoding * Emit REX and REX-less encodings for optional REX prefix Value renumbering in binary64.cton
2017-07-31 14:52:39 -07:00
parent 54534e2147
commit 07e1f682d0
4 changed files with 373 additions and 102 deletions
--- a/lib/cretonne/meta/base/instructions.py
+++ b/lib/cretonne/meta/base/instructions.py
@@ -271,7 +271,7 @@ istore16 = Instruction(
        'istore16', r"""
        Store the low 16 bits of ``x`` to memory at ``p + Offset``.

-        This is equivalent to ``ireduce.i16`` followed by ``store.i8``.
+        This is equivalent to ``ireduce.i16`` followed by ``store.i16``.
        """,
        ins=(Flags, x, p, Offset), can_store=True)

@@ -301,7 +301,7 @@ istore32 = Instruction(
        'istore32', r"""
        Store the low 32 bits of ``x`` to memory at ``p + Offset``.

-        This is equivalent to ``ireduce.i32`` followed by ``store.i8``.
+        This is equivalent to ``ireduce.i32`` followed by ``store.i32``.
        """,
        ins=(Flags, x, p, Offset), can_store=True)

--- a/lib/cretonne/meta/isa/intel/encodings.py
+++ b/lib/cretonne/meta/isa/intel/encodings.py
@@ -55,6 +55,28 @@ def enc_i32_i64(inst, recipe, *args, **kwargs):
    I64.enc(inst.i64, *recipe.rex(*args, w=1, **kwargs))


+def enc_i32_i64_ld_st(inst, w_bit, recipe, *args, **kwargs):
+    # type: (MaybeBoundInst, bool, r.TailRecipe, *int, **int) -> None
+    """
+    Add encodings for `inst.i32` to I32.
+    Add encodings for `inst.i32` to I64 with and without REX.
+    Add encodings for `inst.i64` to I64 with a REX prefix, using the `w_bit`
+    argument to determine wheter or not to set the REX.W bit.
+    """
+    I32.enc(inst.i32.any, *recipe(*args, **kwargs))
+
+    # REX-less encoding must come after REX encoding so we don't use it by
+    # default. Otherwise reg-alloc would never use r8 and up.
+    I64.enc(inst.i32.any, *recipe.rex(*args, **kwargs))
+    I64.enc(inst.i32.any, *recipe(*args, **kwargs))
+
+    if w_bit:
+        I64.enc(inst.i64.any, *recipe.rex(*args, w=1, **kwargs))
+    else:
+        I64.enc(inst.i64.any, *recipe.rex(*args, **kwargs))
+        I64.enc(inst.i64.any, *recipe(*args, **kwargs))
+
+
 def enc_flt(inst, recipe, *args, **kwargs):
    # type: (MaybeBoundInst, r.TailRecipe, *int, **int) -> None
    """
@@ -142,38 +164,60 @@ I64.enc(base.ctz.i64, *r.urm.rex(0xf3, 0x0f, 0xbc, w=1),
 I64.enc(base.ctz.i32, *r.urm.rex(0xf3, 0x0f, 0xbc), isap=cfg.use_bmi1)
 I64.enc(base.ctz.i32, *r.urm(0xf3, 0x0f, 0xbc), isap=cfg.use_bmi1)

+#
 # Loads and stores.
-I32.enc(base.store.i32.any, *r.st(0x89))
-I32.enc(base.store.i32.any, *r.stDisp8(0x89))
-I32.enc(base.store.i32.any, *r.stDisp32(0x89))
+#
+enc_i32_i64_ld_st(base.store, True, r.st, 0x89)
+enc_i32_i64_ld_st(base.store, True, r.stDisp8, 0x89)
+enc_i32_i64_ld_st(base.store, True, r.stDisp32, 0x89)

-I32.enc(base.istore16.i32.any, *r.st(0x66, 0x89))
-I32.enc(base.istore16.i32.any, *r.stDisp8(0x66, 0x89))
-I32.enc(base.istore16.i32.any, *r.stDisp32(0x66, 0x89))
+I64.enc(base.istore32.i64.any, *r.st.rex(0x89))
+I64.enc(base.istore32.i64.any, *r.stDisp8.rex(0x89))
+I64.enc(base.istore32.i64.any, *r.stDisp32.rex(0x89))

+enc_i32_i64_ld_st(base.istore16, False, r.st, 0x66, 0x89)
+enc_i32_i64_ld_st(base.istore16, False, r.stDisp8, 0x66, 0x89)
+enc_i32_i64_ld_st(base.istore16, False, r.stDisp32, 0x66, 0x89)
+
+# Byte stores are more complicated because the registers they can address
+# depends of the presence of a REX prefix
 I32.enc(base.istore8.i32.any, *r.st_abcd(0x88))
+I64.enc(base.istore8.i32.any, *r.st_abcd(0x88))
+I64.enc(base.istore8.i64.any, *r.st.rex(0x88))
 I32.enc(base.istore8.i32.any, *r.stDisp8_abcd(0x88))
+I64.enc(base.istore8.i32.any, *r.stDisp8_abcd(0x88))
+I64.enc(base.istore8.i64.any, *r.stDisp8.rex(0x88))
 I32.enc(base.istore8.i32.any, *r.stDisp32_abcd(0x88))
+I64.enc(base.istore8.i32.any, *r.stDisp32_abcd(0x88))
+I64.enc(base.istore8.i64.any, *r.stDisp32.rex(0x88))

-I32.enc(base.load.i32.any, *r.ld(0x8b))
-I32.enc(base.load.i32.any, *r.ldDisp8(0x8b))
-I32.enc(base.load.i32.any, *r.ldDisp32(0x8b))
+enc_i32_i64_ld_st(base.load, True, r.ld, 0x8b)
+enc_i32_i64_ld_st(base.load, True, r.ldDisp8, 0x8b)
+enc_i32_i64_ld_st(base.load, True, r.ldDisp32, 0x8b)

-I32.enc(base.uload16.i32.any, *r.ld(0x0f, 0xb7))
-I32.enc(base.uload16.i32.any, *r.ldDisp8(0x0f, 0xb7))
-I32.enc(base.uload16.i32.any, *r.ldDisp32(0x0f, 0xb7))
+I64.enc(base.uload32.i64, *r.ld.rex(0x8b))
+I64.enc(base.uload32.i64, *r.ldDisp8.rex(0x8b))
+I64.enc(base.uload32.i64, *r.ldDisp32.rex(0x8b))

-I32.enc(base.sload16.i32.any, *r.ld(0x0f, 0xbf))
-I32.enc(base.sload16.i32.any, *r.ldDisp8(0x0f, 0xbf))
-I32.enc(base.sload16.i32.any, *r.ldDisp32(0x0f, 0xbf))
+I64.enc(base.sload32.i64, *r.ld.rex(0x63, w=1))
+I64.enc(base.sload32.i64, *r.ldDisp8.rex(0x63, w=1))
+I64.enc(base.sload32.i64, *r.ldDisp32.rex(0x63, w=1))

-I32.enc(base.uload8.i32.any, *r.ld(0x0f, 0xb6))
-I32.enc(base.uload8.i32.any, *r.ldDisp8(0x0f, 0xb6))
-I32.enc(base.uload8.i32.any, *r.ldDisp32(0x0f, 0xb6))
+enc_i32_i64_ld_st(base.uload16, True, r.ld, 0x0f, 0xb7)
+enc_i32_i64_ld_st(base.uload16, True, r.ldDisp8, 0x0f, 0xb7)
+enc_i32_i64_ld_st(base.uload16, True, r.ldDisp32, 0x0f, 0xb7)

-I32.enc(base.sload8.i32.any, *r.ld(0x0f, 0xbe))
-I32.enc(base.sload8.i32.any, *r.ldDisp8(0x0f, 0xbe))
-I32.enc(base.sload8.i32.any, *r.ldDisp32(0x0f, 0xbe))
+enc_i32_i64_ld_st(base.sload16, True, r.ld, 0x0f, 0xbf)
+enc_i32_i64_ld_st(base.sload16, True, r.ldDisp8, 0x0f, 0xbf)
+enc_i32_i64_ld_st(base.sload16, True, r.ldDisp32, 0x0f, 0xbf)
+
+enc_i32_i64_ld_st(base.uload8, True, r.ld, 0x0f, 0xb6)
+enc_i32_i64_ld_st(base.uload8, True, r.ldDisp8, 0x0f, 0xb6)
+enc_i32_i64_ld_st(base.uload8, True, r.ldDisp32, 0x0f, 0xb6)
+
+enc_i32_i64_ld_st(base.sload8, True, r.ld, 0x0f, 0xbe)
+enc_i32_i64_ld_st(base.sload8, True, r.ldDisp8, 0x0f, 0xbe)
+enc_i32_i64_ld_st(base.sload8, True, r.ldDisp32, 0x0f, 0xbe)

 #
 # Call/return
--- a/lib/cretonne/src/isa/intel/binemit.rs
+++ b/lib/cretonne/src/isa/intel/binemit.rs
@@ -114,6 +114,15 @@ fn put_rexmp2<CS: CodeSink + ?Sized>(bits: u16, rex: u8, sink: &mut CS) {
    sink.put1(bits as u8);
 }

+// Emit single-byte opcode with mandatory prefix and REX.
+fn put_rexmp1<CS: CodeSink + ?Sized>(bits: u16, rex: u8, sink: &mut CS) {
+    debug_assert_eq!(bits & 0x0c00, 0, "Invalid encoding bits for Mp1*");
+    let pp = (bits >> 8) & 3;
+    sink.put1(PREFIX[(pp - 1) as usize]);
+    rex_prefix(bits, rex, sink);
+    sink.put1(bits as u8);
+}
+
 /// Emit a ModR/M byte for reg-reg operands.
 fn modrm_rr<CS: CodeSink + ?Sized>(rm: RegUnit, reg: RegUnit, sink: &mut CS) {
    let reg = reg as u8 & 7;