Add encodings for spill.b1, fill.b1 etc.

These spills and fills use 32-bit writes, knowing that the spill slot is minimum 4 bytes which makes it safe. Also simplify the definition of load/store encodings a bit by introducing loops.
2017-10-11 10:02:43 -07:00
parent 699cb9895e
commit ece09f2df2
1 changed files with 28 additions and 51 deletions
--- a/lib/cretonne/meta/isa/intel/encodings.py
+++ b/lib/cretonne/meta/isa/intel/encodings.py
@@ -120,6 +120,7 @@ enc_i32_i64(x86.sdivmodx, r.div, 0xf7, rrr=7)
 enc_i32_i64(x86.udivmodx, r.div, 0xf7, rrr=6)

 enc_i32_i64(base.copy, r.umr, 0x89)
+enc_both(base.copy.b1, r.umr, 0x89)
 enc_i32_i64(base.regmove, r.rmov, 0x89)
 enc_both(base.regmove.b1, r.rmov, 0x89)

@@ -188,66 +189,42 @@ I64.enc(base.ctz.i32, *r.urm(0xf3, 0x0f, 0xbc), isap=cfg.use_bmi1)
 #
 # Loads and stores.
 #
-enc_i32_i64_ld_st(base.store, True, r.st, 0x89)
-enc_i32_i64_ld_st(base.store, True, r.stDisp8, 0x89)
-enc_i32_i64_ld_st(base.store, True, r.stDisp32, 0x89)
+for recipe in [r.st, r.stDisp8, r.stDisp32]:
+    enc_i32_i64_ld_st(base.store, True, recipe, 0x89)
+    enc_i64(base.istore32.i64.any, recipe, 0x89)
+    enc_i32_i64_ld_st(base.istore16, False, recipe, 0x66, 0x89)
+
+# Byte stores are more complicated because the registers they can address
+# depends of the presence of a REX prefix. The st*_abcd recipes fall back to
+# the corresponding st* recipes when a REX prefix is applied.
+for recipe in [r.st_abcd, r.stDisp8_abcd, r.stDisp32_abcd]:
+    enc_both(base.istore8.i32.any, recipe, 0x88)
+    enc_i64(base.istore8.i64.any, recipe, 0x88)

 enc_i32_i64(base.spill, r.spSib32, 0x89)
 enc_i32_i64(base.regspill, r.rsp32, 0x89)

-enc_i64(base.istore32.i64.any, r.st, 0x89)
-enc_i64(base.istore32.i64.any, r.stDisp8, 0x89)
-enc_i64(base.istore32.i64.any, r.stDisp32, 0x89)
+# Use a 32-bit write for spilling `b1` to avoid constraining the permitted
+# registers.
+# See MIN_SPILL_SLOT_SIZE which makes this safe.
+enc_both(base.spill.b1, r.spSib32, 0x89)
+enc_both(base.regspill.b1, r.rsp32, 0x89)

-enc_i32_i64_ld_st(base.istore16, False, r.st, 0x66, 0x89)
-enc_i32_i64_ld_st(base.istore16, False, r.stDisp8, 0x66, 0x89)
-enc_i32_i64_ld_st(base.istore16, False, r.stDisp32, 0x66, 0x89)
-
-# Byte stores are more complicated because the registers they can address
-# depends of the presence of a REX prefix
-I32.enc(base.istore8.i32.any, *r.st_abcd(0x88))
-I64.enc(base.istore8.i32.any, *r.st_abcd(0x88))
-I64.enc(base.istore8.i64.any, *r.st_abcd(0x88))
-I64.enc(base.istore8.i64.any, *r.st.rex(0x88))
-I32.enc(base.istore8.i32.any, *r.stDisp8_abcd(0x88))
-I64.enc(base.istore8.i32.any, *r.stDisp8_abcd(0x88))
-I64.enc(base.istore8.i64.any, *r.stDisp8_abcd(0x88))
-I64.enc(base.istore8.i64.any, *r.stDisp8.rex(0x88))
-I32.enc(base.istore8.i32.any, *r.stDisp32_abcd(0x88))
-I64.enc(base.istore8.i32.any, *r.stDisp32_abcd(0x88))
-I64.enc(base.istore8.i64.any, *r.stDisp32_abcd(0x88))
-I64.enc(base.istore8.i64.any, *r.stDisp32.rex(0x88))
-
-enc_i32_i64_ld_st(base.load, True, r.ld, 0x8b)
-enc_i32_i64_ld_st(base.load, True, r.ldDisp8, 0x8b)
-enc_i32_i64_ld_st(base.load, True, r.ldDisp32, 0x8b)
+for recipe in [r.ld, r.ldDisp8, r.ldDisp32]:
+    enc_i32_i64_ld_st(base.load, True, recipe, 0x8b)
+    enc_i64(base.uload32.i64, recipe, 0x8b)
+    I64.enc(base.sload32.i64, *recipe.rex(0x63, w=1))
+    enc_i32_i64_ld_st(base.uload16, True, recipe, 0x0f, 0xb7)
+    enc_i32_i64_ld_st(base.sload16, True, recipe, 0x0f, 0xbf)
+    enc_i32_i64_ld_st(base.uload8, True, recipe, 0x0f, 0xb6)
+    enc_i32_i64_ld_st(base.sload8, True, recipe, 0x0f, 0xbe)

 enc_i32_i64(base.fill, r.fiSib32, 0x8b)
 enc_i32_i64(base.regfill, r.rfi32, 0x8b)

-enc_i64(base.uload32.i64, r.ld, 0x8b)
-enc_i64(base.uload32.i64, r.ldDisp8, 0x8b)
-enc_i64(base.uload32.i64, r.ldDisp32, 0x8b)
-
-I64.enc(base.sload32.i64, *r.ld.rex(0x63, w=1))
-I64.enc(base.sload32.i64, *r.ldDisp8.rex(0x63, w=1))
-I64.enc(base.sload32.i64, *r.ldDisp32.rex(0x63, w=1))
-
-enc_i32_i64_ld_st(base.uload16, True, r.ld, 0x0f, 0xb7)
-enc_i32_i64_ld_st(base.uload16, True, r.ldDisp8, 0x0f, 0xb7)
-enc_i32_i64_ld_st(base.uload16, True, r.ldDisp32, 0x0f, 0xb7)
-
-enc_i32_i64_ld_st(base.sload16, True, r.ld, 0x0f, 0xbf)
-enc_i32_i64_ld_st(base.sload16, True, r.ldDisp8, 0x0f, 0xbf)
-enc_i32_i64_ld_st(base.sload16, True, r.ldDisp32, 0x0f, 0xbf)
-
-enc_i32_i64_ld_st(base.uload8, True, r.ld, 0x0f, 0xb6)
-enc_i32_i64_ld_st(base.uload8, True, r.ldDisp8, 0x0f, 0xb6)
-enc_i32_i64_ld_st(base.uload8, True, r.ldDisp32, 0x0f, 0xb6)
-
-enc_i32_i64_ld_st(base.sload8, True, r.ld, 0x0f, 0xbe)
-enc_i32_i64_ld_st(base.sload8, True, r.ldDisp8, 0x0f, 0xbe)
-enc_i32_i64_ld_st(base.sload8, True, r.ldDisp32, 0x0f, 0xbe)
+# Load 32 bits from `b1` spill slots. See `spill.b1` above.
+enc_both(base.fill.b1, r.fiSib32, 0x8b)
+enc_both(base.regfill.b1, r.rfi32, 0x8b)

 #
 # Float loads and stores.