diff --git a/lib/cretonne/meta/isa/intel/encodings.py b/lib/cretonne/meta/isa/intel/encodings.py index 6d79ca726e..10d89a33b4 100644 --- a/lib/cretonne/meta/isa/intel/encodings.py +++ b/lib/cretonne/meta/isa/intel/encodings.py @@ -120,6 +120,7 @@ enc_i32_i64(x86.sdivmodx, r.div, 0xf7, rrr=7) enc_i32_i64(x86.udivmodx, r.div, 0xf7, rrr=6) enc_i32_i64(base.copy, r.umr, 0x89) +enc_both(base.copy.b1, r.umr, 0x89) enc_i32_i64(base.regmove, r.rmov, 0x89) enc_both(base.regmove.b1, r.rmov, 0x89) @@ -188,66 +189,42 @@ I64.enc(base.ctz.i32, *r.urm(0xf3, 0x0f, 0xbc), isap=cfg.use_bmi1) # # Loads and stores. # -enc_i32_i64_ld_st(base.store, True, r.st, 0x89) -enc_i32_i64_ld_st(base.store, True, r.stDisp8, 0x89) -enc_i32_i64_ld_st(base.store, True, r.stDisp32, 0x89) +for recipe in [r.st, r.stDisp8, r.stDisp32]: + enc_i32_i64_ld_st(base.store, True, recipe, 0x89) + enc_i64(base.istore32.i64.any, recipe, 0x89) + enc_i32_i64_ld_st(base.istore16, False, recipe, 0x66, 0x89) + +# Byte stores are more complicated because the registers they can address +# depends of the presence of a REX prefix. The st*_abcd recipes fall back to +# the corresponding st* recipes when a REX prefix is applied. +for recipe in [r.st_abcd, r.stDisp8_abcd, r.stDisp32_abcd]: + enc_both(base.istore8.i32.any, recipe, 0x88) + enc_i64(base.istore8.i64.any, recipe, 0x88) enc_i32_i64(base.spill, r.spSib32, 0x89) enc_i32_i64(base.regspill, r.rsp32, 0x89) -enc_i64(base.istore32.i64.any, r.st, 0x89) -enc_i64(base.istore32.i64.any, r.stDisp8, 0x89) -enc_i64(base.istore32.i64.any, r.stDisp32, 0x89) +# Use a 32-bit write for spilling `b1` to avoid constraining the permitted +# registers. +# See MIN_SPILL_SLOT_SIZE which makes this safe. +enc_both(base.spill.b1, r.spSib32, 0x89) +enc_both(base.regspill.b1, r.rsp32, 0x89) -enc_i32_i64_ld_st(base.istore16, False, r.st, 0x66, 0x89) -enc_i32_i64_ld_st(base.istore16, False, r.stDisp8, 0x66, 0x89) -enc_i32_i64_ld_st(base.istore16, False, r.stDisp32, 0x66, 0x89) - -# Byte stores are more complicated because the registers they can address -# depends of the presence of a REX prefix -I32.enc(base.istore8.i32.any, *r.st_abcd(0x88)) -I64.enc(base.istore8.i32.any, *r.st_abcd(0x88)) -I64.enc(base.istore8.i64.any, *r.st_abcd(0x88)) -I64.enc(base.istore8.i64.any, *r.st.rex(0x88)) -I32.enc(base.istore8.i32.any, *r.stDisp8_abcd(0x88)) -I64.enc(base.istore8.i32.any, *r.stDisp8_abcd(0x88)) -I64.enc(base.istore8.i64.any, *r.stDisp8_abcd(0x88)) -I64.enc(base.istore8.i64.any, *r.stDisp8.rex(0x88)) -I32.enc(base.istore8.i32.any, *r.stDisp32_abcd(0x88)) -I64.enc(base.istore8.i32.any, *r.stDisp32_abcd(0x88)) -I64.enc(base.istore8.i64.any, *r.stDisp32_abcd(0x88)) -I64.enc(base.istore8.i64.any, *r.stDisp32.rex(0x88)) - -enc_i32_i64_ld_st(base.load, True, r.ld, 0x8b) -enc_i32_i64_ld_st(base.load, True, r.ldDisp8, 0x8b) -enc_i32_i64_ld_st(base.load, True, r.ldDisp32, 0x8b) +for recipe in [r.ld, r.ldDisp8, r.ldDisp32]: + enc_i32_i64_ld_st(base.load, True, recipe, 0x8b) + enc_i64(base.uload32.i64, recipe, 0x8b) + I64.enc(base.sload32.i64, *recipe.rex(0x63, w=1)) + enc_i32_i64_ld_st(base.uload16, True, recipe, 0x0f, 0xb7) + enc_i32_i64_ld_st(base.sload16, True, recipe, 0x0f, 0xbf) + enc_i32_i64_ld_st(base.uload8, True, recipe, 0x0f, 0xb6) + enc_i32_i64_ld_st(base.sload8, True, recipe, 0x0f, 0xbe) enc_i32_i64(base.fill, r.fiSib32, 0x8b) enc_i32_i64(base.regfill, r.rfi32, 0x8b) -enc_i64(base.uload32.i64, r.ld, 0x8b) -enc_i64(base.uload32.i64, r.ldDisp8, 0x8b) -enc_i64(base.uload32.i64, r.ldDisp32, 0x8b) - -I64.enc(base.sload32.i64, *r.ld.rex(0x63, w=1)) -I64.enc(base.sload32.i64, *r.ldDisp8.rex(0x63, w=1)) -I64.enc(base.sload32.i64, *r.ldDisp32.rex(0x63, w=1)) - -enc_i32_i64_ld_st(base.uload16, True, r.ld, 0x0f, 0xb7) -enc_i32_i64_ld_st(base.uload16, True, r.ldDisp8, 0x0f, 0xb7) -enc_i32_i64_ld_st(base.uload16, True, r.ldDisp32, 0x0f, 0xb7) - -enc_i32_i64_ld_st(base.sload16, True, r.ld, 0x0f, 0xbf) -enc_i32_i64_ld_st(base.sload16, True, r.ldDisp8, 0x0f, 0xbf) -enc_i32_i64_ld_st(base.sload16, True, r.ldDisp32, 0x0f, 0xbf) - -enc_i32_i64_ld_st(base.uload8, True, r.ld, 0x0f, 0xb6) -enc_i32_i64_ld_st(base.uload8, True, r.ldDisp8, 0x0f, 0xb6) -enc_i32_i64_ld_st(base.uload8, True, r.ldDisp32, 0x0f, 0xb6) - -enc_i32_i64_ld_st(base.sload8, True, r.ld, 0x0f, 0xbe) -enc_i32_i64_ld_st(base.sload8, True, r.ldDisp8, 0x0f, 0xbe) -enc_i32_i64_ld_st(base.sload8, True, r.ldDisp32, 0x0f, 0xbe) +# Load 32 bits from `b1` spill slots. See `spill.b1` above. +enc_both(base.fill.b1, r.fiSib32, 0x8b) +enc_both(base.regfill.b1, r.rfi32, 0x8b) # # Float loads and stores.