From 9d6821d6d999e62aae49526ef0eca66387c29c37 Mon Sep 17 00:00:00 2001 From: Benjamin Bouvier Date: Tue, 16 Oct 2018 00:43:38 +0200 Subject: [PATCH] Fix #335: Introduce variable size recipes and remove GPR_SAFE reg classes (#552) * Rename size to base_size and introduce a compute_size function; * Add infra to inspect in/outs registers when computing the size of an instruction; * Remove the GPR_SAFE_DEREF and GPR_ZERO_DEREF_SAFE register classes on x86 (fixes #335); --- .../regalloc/gpr-deref-safe-335.clif | 44 ++ lib/codegen/meta-python/cdsl/isa.py | 30 +- lib/codegen/meta-python/gen_encoding.py | 3 +- lib/codegen/meta-python/isa/riscv/recipes.py | 34 +- lib/codegen/meta-python/isa/x86/recipes.py | 470 +++++++++++------- lib/codegen/meta-python/isa/x86/registers.py | 7 - lib/codegen/src/binemit/relaxation.rs | 15 +- lib/codegen/src/binemit/shrink.rs | 6 +- lib/codegen/src/context.rs | 1 - lib/codegen/src/ir/function.rs | 21 +- lib/codegen/src/ir/stackslot.rs | 2 +- lib/codegen/src/isa/encoding.rs | 33 +- lib/codegen/src/isa/mod.rs | 4 +- lib/codegen/src/isa/riscv/enc_tables.rs | 2 +- lib/codegen/src/isa/x86/binemit.rs | 1 + lib/codegen/src/isa/x86/enc_tables.rs | 60 ++- lib/filetests/src/test_binemit.rs | 5 +- 17 files changed, 498 insertions(+), 240 deletions(-) create mode 100644 cranelift/filetests/regalloc/gpr-deref-safe-335.clif diff --git a/cranelift/filetests/regalloc/gpr-deref-safe-335.clif b/cranelift/filetests/regalloc/gpr-deref-safe-335.clif new file mode 100644 index 0000000000..8e8b2260cf --- /dev/null +++ b/cranelift/filetests/regalloc/gpr-deref-safe-335.clif @@ -0,0 +1,44 @@ +test regalloc +target x86_64 + +function u0:587() fast { +ebb0: + v97 = iconst.i32 0 + v169 = iconst.i32 0 + v1729 = iconst.i32 0 + jump ebb100(v97, v97, v97, v97, v97) + +ebb100(v1758: i32, v1784: i32, v1845: i32, v1856: i32, v1870: i32): + v1762 = iconst.i32 0 + v1769 = iconst.i32 0 + v1774 = iconst.i32 0 + v1864 = iconst.i32 0 + v1897 = iconst.i32 0 + jump ebb102(v1774, v1784, v1845, v1856, v1870, v1758, v1762, v169, v1729, v97, v169, v169, v169, v169) + +ebb102(v1785: i32, v1789: i32, v1843: i32, v1854: i32, v1868: i32, v1882: i32, v1890: i32, v1901: i32, v1921: i32, v1933: i32, v2058: i32, v2124: i32, v2236: i32, v2366: i32): + v1929 = iconst.i32 0 + v1943 = iconst.i32 0 + v1949 = iconst.i32 0 + jump ebb123(v1897, v1769) + +ebb123(v1950: i32, v1979: i32): + v1955 = iconst.i32 0 + brz v1955, ebb125 + jump ebb122(v1929, v1843, v1864, v2058, v1882, v1897, v1943, v1868, v2124, v1901) + +ebb125: + v1961 = iadd_imm.i32 v1949, 0 + v1952 = iconst.i32 0 + v1962 = iconst.i64 0 + v1963 = load.i32 v1962 + brz v1963, ebb123(v1952, v1961) + jump ebb127 + +ebb127: + v1966 = iconst.i32 0 + jump ebb122(v1963, v1966, v1966, v1966, v1966, v1966, v1966, v1966, v1966, v1966) + +ebb122(v1967: i32, v1971: i32, v1972: i32, v1978: i32, v2032: i32, v2041: i32, v2053: i32, v2076: i32, v2085: i32, v2096: i32): + trap user0 +} diff --git a/lib/codegen/meta-python/cdsl/isa.py b/lib/codegen/meta-python/cdsl/isa.py index 5c57c52fbd..90a9610d62 100644 --- a/lib/codegen/meta-python/cdsl/isa.py +++ b/lib/codegen/meta-python/cdsl/isa.py @@ -316,7 +316,8 @@ class EncRecipe(object): :param name: Short mnemonic name for this recipe. :param format: All encoded instructions must have this :py:class:`InstructionFormat`. - :param size: Number of bytes in the binary encoded instruction. + :param base_size: Base number of bytes in the binary encoded instruction. + :param compute_size: Function name to use when computing actual size. :param ins: Tuple of register constraints for value operands. :param outs: Tuple of register constraints for results. :param branch_range: `(origin, bits)` range for branches. @@ -328,22 +329,25 @@ class EncRecipe(object): def __init__( self, - name, # type: str - format, # type: InstructionFormat - size, # type: int - ins, # type: ConstraintSeq - outs, # type: ConstraintSeq - branch_range=None, # type: BranchRange - clobbers_flags=True, # type: bool - instp=None, # type: PredNode - isap=None, # type: PredNode - emit=None # type: str + name, # type: str + format, # type: InstructionFormat + base_size, # type: int + ins, # type: ConstraintSeq + outs, # type: ConstraintSeq + compute_size=None, # type: str + branch_range=None, # type: BranchRange + clobbers_flags=True, # type: bool + instp=None, # type: PredNode + isap=None, # type: PredNode + emit=None # type: str ): # type: (...) -> None self.name = name self.format = format - assert size >= 0 - self.size = size + assert base_size >= 0 + self.base_size = base_size + self.compute_size = compute_size if compute_size is not None \ + else 'base_size' self.branch_range = branch_range self.clobbers_flags = clobbers_flags self.instp = instp diff --git a/lib/codegen/meta-python/gen_encoding.py b/lib/codegen/meta-python/gen_encoding.py index 59875bf489..6c5d7367ed 100644 --- a/lib/codegen/meta-python/gen_encoding.py +++ b/lib/codegen/meta-python/gen_encoding.py @@ -832,7 +832,8 @@ def emit_recipe_sizing(isa, fmt): for r in isa.all_recipes: fmt.comment('Code size information for recipe {}:'.format(r.name)) with fmt.indented('RecipeSizing {', '},'): - fmt.format('bytes: {},', r.size) + fmt.format('base_size: {},', r.base_size) + fmt.format('compute_size: {},', r.compute_size) if r.branch_range: fmt.format( 'branch_range: ' diff --git a/lib/codegen/meta-python/isa/riscv/recipes.py b/lib/codegen/meta-python/isa/riscv/recipes.py index 170dd914d3..9808892b3c 100644 --- a/lib/codegen/meta-python/isa/riscv/recipes.py +++ b/lib/codegen/meta-python/isa/riscv/recipes.py @@ -93,33 +93,33 @@ def LUI(): # R-type 32-bit instructions: These are mostly binary arithmetic instructions. # The encbits are `opcode[6:2] | (funct3 << 5) | (funct7 << 8) R = EncRecipe( - 'R', Binary, size=4, ins=(GPR, GPR), outs=GPR, + 'R', Binary, base_size=4, ins=(GPR, GPR), outs=GPR, emit='put_r(bits, in_reg0, in_reg1, out_reg0, sink);') # R-type with an immediate shift amount instead of rs2. Rshamt = EncRecipe( - 'Rshamt', BinaryImm, size=4, ins=GPR, outs=GPR, + 'Rshamt', BinaryImm, base_size=4, ins=GPR, outs=GPR, emit='put_rshamt(bits, in_reg0, imm.into(), out_reg0, sink);') # R-type encoding of an integer comparison. Ricmp = EncRecipe( - 'Ricmp', IntCompare, size=4, ins=(GPR, GPR), outs=GPR, + 'Ricmp', IntCompare, base_size=4, ins=(GPR, GPR), outs=GPR, emit='put_r(bits, in_reg0, in_reg1, out_reg0, sink);') Ii = EncRecipe( - 'Ii', BinaryImm, size=4, ins=GPR, outs=GPR, + 'Ii', BinaryImm, base_size=4, ins=GPR, outs=GPR, instp=IsSignedInt(BinaryImm.imm, 12), emit='put_i(bits, in_reg0, imm.into(), out_reg0, sink);') # I-type instruction with a hardcoded %x0 rs1. Iz = EncRecipe( - 'Iz', UnaryImm, size=4, ins=(), outs=GPR, + 'Iz', UnaryImm, base_size=4, ins=(), outs=GPR, instp=IsSignedInt(UnaryImm.imm, 12), emit='put_i(bits, 0, imm.into(), out_reg0, sink);') # I-type encoding of an integer comparison. Iicmp = EncRecipe( - 'Iicmp', IntCompareImm, size=4, ins=GPR, outs=GPR, + 'Iicmp', IntCompareImm, base_size=4, ins=GPR, outs=GPR, instp=IsSignedInt(IntCompareImm.imm, 12), emit='put_i(bits, in_reg0, imm.into(), out_reg0, sink);') @@ -127,7 +127,7 @@ Iicmp = EncRecipe( # immediate offset. # The variable return values are not encoded. Iret = EncRecipe( - 'Iret', MultiAry, size=4, ins=(), outs=(), + 'Iret', MultiAry, base_size=4, ins=(), outs=(), emit=''' // Return instructions are always a jalr to %x1. // The return address is provided as a special-purpose link argument. @@ -142,7 +142,7 @@ Iret = EncRecipe( # I-type encoding for `jalr` as a call_indirect. Icall = EncRecipe( - 'Icall', CallIndirect, size=4, ins=GPR, outs=(), + 'Icall', CallIndirect, base_size=4, ins=GPR, outs=(), emit=''' // call_indirect instructions are jalr with rd=%x1. put_i( @@ -157,23 +157,23 @@ Icall = EncRecipe( # Copy of a GPR is implemented as addi x, 0. Icopy = EncRecipe( - 'Icopy', Unary, size=4, ins=GPR, outs=GPR, + 'Icopy', Unary, base_size=4, ins=GPR, outs=GPR, emit='put_i(bits, in_reg0, 0, out_reg0, sink);') # Same for a GPR regmove. Irmov = EncRecipe( - 'Irmov', RegMove, size=4, ins=GPR, outs=(), + 'Irmov', RegMove, base_size=4, ins=GPR, outs=(), emit='put_i(bits, src, 0, dst, sink);') # U-type instructions have a 20-bit immediate that targets bits 12-31. U = EncRecipe( - 'U', UnaryImm, size=4, ins=(), outs=GPR, + 'U', UnaryImm, base_size=4, ins=(), outs=GPR, instp=IsSignedInt(UnaryImm.imm, 32, 12), emit='put_u(bits, imm.into(), out_reg0, sink);') # UJ-type unconditional branch instructions. UJ = EncRecipe( - 'UJ', Jump, size=4, ins=(), outs=(), branch_range=(0, 21), + 'UJ', Jump, base_size=4, ins=(), outs=(), branch_range=(0, 21), emit=''' let dest = i64::from(func.offsets[destination]); let disp = dest - i64::from(sink.offset()); @@ -181,7 +181,7 @@ UJ = EncRecipe( ''') UJcall = EncRecipe( - 'UJcall', Call, size=4, ins=(), outs=(), + 'UJcall', Call, base_size=4, ins=(), outs=(), emit=''' sink.reloc_external(Reloc::RiscvCall, &func.dfg.ext_funcs[func_ref].name, @@ -192,7 +192,7 @@ UJcall = EncRecipe( # SB-type branch instructions. SB = EncRecipe( - 'SB', BranchIcmp, size=4, + 'SB', BranchIcmp, base_size=4, ins=(GPR, GPR), outs=(), branch_range=(0, 13), emit=''' @@ -203,7 +203,7 @@ SB = EncRecipe( # SB-type branch instruction with rs2 fixed to zero. SBzero = EncRecipe( - 'SBzero', Branch, size=4, + 'SBzero', Branch, base_size=4, ins=(GPR), outs=(), branch_range=(0, 13), emit=''' @@ -214,12 +214,12 @@ SBzero = EncRecipe( # Spill of a GPR. GPsp = EncRecipe( - 'GPsp', Unary, size=4, + 'GPsp', Unary, base_size=4, ins=GPR, outs=Stack(GPR), emit='unimplemented!();') # Fill of a GPR. GPfi = EncRecipe( - 'GPfi', Unary, size=4, + 'GPfi', Unary, base_size=4, ins=Stack(GPR), outs=GPR, emit='unimplemented!();') diff --git a/lib/codegen/meta-python/isa/x86/recipes.py b/lib/codegen/meta-python/isa/x86/recipes.py index 8a204728d9..4c495807b7 100644 --- a/lib/codegen/meta-python/isa/x86/recipes.py +++ b/lib/codegen/meta-python/isa/x86/recipes.py @@ -19,8 +19,8 @@ from base.formats import Ternary, FuncAddr, UnaryGlobalValue from base.formats import RegMove, RegSpill, RegFill, CopySpecial from base.formats import LoadComplex, StoreComplex from base.formats import StackLoad -from .registers import GPR, ABCD, FPR, GPR_DEREF_SAFE, GPR_ZERO_DEREF_SAFE -from .registers import GPR8, FPR8, GPR8_DEREF_SAFE, GPR8_ZERO_DEREF_SAFE, FLAG +from .registers import GPR, ABCD, FPR +from .registers import GPR8, FPR8, FLAG from .registers import StackGPR32, StackFPR32 from .defs import supported_floatccs from .settings import use_sse41 @@ -113,8 +113,6 @@ def replace_put_op(emit, prefix): # Register class mapping for no-REX instructions. NOREX_MAP = { GPR: GPR8, - GPR_DEREF_SAFE: GPR8_DEREF_SAFE, - GPR_ZERO_DEREF_SAFE: GPR8_ZERO_DEREF_SAFE, FPR: FPR8 } @@ -156,7 +154,7 @@ class TailRecipe: self, name, # type: str format, # type: InstructionFormat - size, # type: int + base_size, # type: int ins, # type: ConstraintSeq outs, # type: ConstraintSeq branch_range=None, # type: int @@ -165,12 +163,13 @@ class TailRecipe: isap=None, # type: PredNode when_prefixed=None, # type: TailRecipe requires_prefix=False, # type: bool - emit=None # type: str + emit=None, # type: str + compute_size=None # type: str ): # type: (...) -> None self.name = name self.format = format - self.size = size + self.base_size = base_size self.ins = ins self.outs = outs self.branch_range = branch_range @@ -180,6 +179,7 @@ class TailRecipe: self.when_prefixed = when_prefixed self.requires_prefix = requires_prefix self.emit = emit + self.compute_size = compute_size # Cached recipes, keyed by name prefix. self.recipes = dict() # type: Dict[str, EncRecipe] @@ -194,25 +194,26 @@ class TailRecipe: rrr = kwargs.get('rrr', 0) w = kwargs.get('w', 0) name, bits = decode_ops(ops, rrr, w) - size = len(ops) + self.size + base_size = len(ops) + self.base_size # All branch ranges are relative to the end of the instruction. branch_range = None # type BranchRange if self.branch_range is not None: - branch_range = (size, self.branch_range) + branch_range = (base_size, self.branch_range) if name not in self.recipes: recipe = EncRecipe( name + self.name, self.format, - size, + base_size, ins=self.ins, outs=self.outs, branch_range=branch_range, clobbers_flags=self.clobbers_flags, instp=self.instp, isap=self.isap, - emit=replace_put_op(self.emit, name)) + emit=replace_put_op(self.emit, name), + compute_size=self.compute_size) recipe.ins = map_regs_norex(recipe.ins) recipe.outs = map_regs_norex(recipe.outs) @@ -237,25 +238,26 @@ class TailRecipe: w = kwargs.get('w', 0) name, bits = decode_ops(ops, rrr, w) name = 'Rex' + name - size = 1 + len(ops) + self.size + base_size = 1 + len(ops) + self.base_size # All branch ranges are relative to the end of the instruction. branch_range = None # type BranchRange if self.branch_range is not None: - branch_range = (size, self.branch_range) + branch_range = (base_size, self.branch_range) if name not in self.recipes: recipe = EncRecipe( name + self.name, self.format, - size, + base_size, ins=self.ins, outs=self.outs, branch_range=branch_range, clobbers_flags=self.clobbers_flags, instp=self.instp, isap=self.isap, - emit=replace_put_op(self.emit, name)) + emit=replace_put_op(self.emit, name), + compute_size=self.compute_size) self.recipes[name] = recipe return (self.recipes[name], bits) @@ -291,11 +293,11 @@ def valid_scale(iform): # A null unary instruction that takes a GPR register. Can be used for identity # copies and no-op conversions. -null = EncRecipe('null', Unary, size=0, ins=GPR, outs=0, emit='') +null = EncRecipe('null', Unary, base_size=0, ins=GPR, outs=0, emit='') # XX opcode, no ModR/M. trap = TailRecipe( - 'trap', Trap, size=0, ins=(), outs=(), + 'trap', Trap, base_size=0, ins=(), outs=(), emit=''' sink.trap(code, func.srclocs[inst]); PUT_OP(bits, BASE_REX, sink); @@ -303,7 +305,7 @@ trap = TailRecipe( # Macro: conditional jump over a ud2. trapif = EncRecipe( - 'trapif', IntCondTrap, size=4, ins=FLAG.rflags, outs=(), + 'trapif', IntCondTrap, base_size=4, ins=FLAG.rflags, outs=(), clobbers_flags=False, emit=''' // Jump over a 2-byte ud2. @@ -316,7 +318,7 @@ trapif = EncRecipe( ''') trapff = EncRecipe( - 'trapff', FloatCondTrap, size=4, ins=FLAG.rflags, outs=(), + 'trapff', FloatCondTrap, base_size=4, ins=FLAG.rflags, outs=(), clobbers_flags=False, instp=floatccs(FloatCondTrap), emit=''' @@ -332,7 +334,7 @@ trapff = EncRecipe( # XX /r rr = TailRecipe( - 'rr', Binary, size=1, ins=(GPR, GPR), outs=0, + 'rr', Binary, base_size=1, ins=(GPR, GPR), outs=0, emit=''' PUT_OP(bits, rex2(in_reg0, in_reg1), sink); modrm_rr(in_reg0, in_reg1, sink); @@ -340,7 +342,7 @@ rr = TailRecipe( # XX /r with operands swapped. (RM form). rrx = TailRecipe( - 'rrx', Binary, size=1, ins=(GPR, GPR), outs=0, + 'rrx', Binary, base_size=1, ins=(GPR, GPR), outs=0, emit=''' PUT_OP(bits, rex2(in_reg1, in_reg0), sink); modrm_rr(in_reg1, in_reg0, sink); @@ -348,7 +350,7 @@ rrx = TailRecipe( # XX /r with FPR ins and outs. A form. fa = TailRecipe( - 'fa', Binary, size=1, ins=(FPR, FPR), outs=0, + 'fa', Binary, base_size=1, ins=(FPR, FPR), outs=0, emit=''' PUT_OP(bits, rex2(in_reg1, in_reg0), sink); modrm_rr(in_reg1, in_reg0, sink); @@ -356,7 +358,7 @@ fa = TailRecipe( # XX /r with FPR ins and outs. A form with input operands swapped. fax = TailRecipe( - 'fax', Binary, size=1, ins=(FPR, FPR), outs=1, + 'fax', Binary, base_size=1, ins=(FPR, FPR), outs=1, emit=''' PUT_OP(bits, rex2(in_reg0, in_reg1), sink); modrm_rr(in_reg0, in_reg1, sink); @@ -364,7 +366,7 @@ fax = TailRecipe( # XX /n for a unary operation with extension bits. ur = TailRecipe( - 'ur', Unary, size=1, ins=GPR, outs=0, + 'ur', Unary, base_size=1, ins=GPR, outs=0, emit=''' PUT_OP(bits, rex1(in_reg0), sink); modrm_r_bits(in_reg0, bits, sink); @@ -373,7 +375,7 @@ ur = TailRecipe( # XX /r, but for a unary operator with separate input/output register, like # copies. MR form, preserving flags. umr = TailRecipe( - 'umr', Unary, size=1, ins=GPR, outs=GPR, + 'umr', Unary, base_size=1, ins=GPR, outs=GPR, clobbers_flags=False, emit=''' PUT_OP(bits, rex2(out_reg0, in_reg0), sink); @@ -382,7 +384,7 @@ umr = TailRecipe( # Same as umr, but with FPR -> GPR registers. rfumr = TailRecipe( - 'rfumr', Unary, size=1, ins=FPR, outs=GPR, + 'rfumr', Unary, base_size=1, ins=FPR, outs=GPR, clobbers_flags=False, emit=''' PUT_OP(bits, rex2(out_reg0, in_reg0), sink); @@ -392,7 +394,7 @@ rfumr = TailRecipe( # XX /r, but for a unary operator with separate input/output register. # RM form. Clobbers FLAGS. urm = TailRecipe( - 'urm', Unary, size=1, ins=GPR, outs=GPR, + 'urm', Unary, base_size=1, ins=GPR, outs=GPR, emit=''' PUT_OP(bits, rex2(in_reg0, out_reg0), sink); modrm_rr(in_reg0, out_reg0, sink); @@ -400,7 +402,7 @@ urm = TailRecipe( # XX /r. Same as urm, but doesn't clobber FLAGS. urm_noflags = TailRecipe( - 'urm_noflags', Unary, size=1, ins=GPR, outs=GPR, + 'urm_noflags', Unary, base_size=1, ins=GPR, outs=GPR, clobbers_flags=False, emit=''' PUT_OP(bits, rex2(in_reg0, out_reg0), sink); @@ -409,7 +411,7 @@ urm_noflags = TailRecipe( # XX /r. Same as urm_noflags, but input limited to ABCD. urm_noflags_abcd = TailRecipe( - 'urm_noflags_abcd', Unary, size=1, ins=ABCD, outs=GPR, + 'urm_noflags_abcd', Unary, base_size=1, ins=ABCD, outs=GPR, when_prefixed=urm_noflags, clobbers_flags=False, emit=''' @@ -419,7 +421,7 @@ urm_noflags_abcd = TailRecipe( # XX /r, RM form, FPR -> FPR. furm = TailRecipe( - 'furm', Unary, size=1, ins=FPR, outs=FPR, + 'furm', Unary, base_size=1, ins=FPR, outs=FPR, clobbers_flags=False, emit=''' PUT_OP(bits, rex2(in_reg0, out_reg0), sink); @@ -428,7 +430,7 @@ furm = TailRecipe( # XX /r, RM form, GPR -> FPR. frurm = TailRecipe( - 'frurm', Unary, size=1, ins=GPR, outs=FPR, + 'frurm', Unary, base_size=1, ins=GPR, outs=FPR, clobbers_flags=False, emit=''' PUT_OP(bits, rex2(in_reg0, out_reg0), sink); @@ -437,7 +439,7 @@ frurm = TailRecipe( # XX /r, RM form, FPR -> GPR. rfurm = TailRecipe( - 'rfurm', Unary, size=1, ins=FPR, outs=GPR, + 'rfurm', Unary, base_size=1, ins=FPR, outs=GPR, clobbers_flags=False, emit=''' PUT_OP(bits, rex2(in_reg0, out_reg0), sink); @@ -446,7 +448,7 @@ rfurm = TailRecipe( # XX /r, RMI form for one of the roundXX SSE 4.1 instructions. furmi_rnd = TailRecipe( - 'furmi_rnd', Unary, size=2, ins=FPR, outs=FPR, + 'furmi_rnd', Unary, base_size=2, ins=FPR, outs=FPR, isap=use_sse41, emit=''' PUT_OP(bits, rex2(in_reg0, out_reg0), sink); @@ -462,7 +464,7 @@ furmi_rnd = TailRecipe( # XX /r, for regmove instructions. rmov = TailRecipe( - 'rmov', RegMove, size=1, ins=GPR, outs=(), + 'rmov', RegMove, base_size=1, ins=GPR, outs=(), clobbers_flags=False, emit=''' PUT_OP(bits, rex2(dst, src), sink); @@ -471,7 +473,7 @@ rmov = TailRecipe( # XX /r, for regmove instructions (FPR version, RM encoded). frmov = TailRecipe( - 'frmov', RegMove, size=1, ins=FPR, outs=(), + 'frmov', RegMove, base_size=1, ins=FPR, outs=(), clobbers_flags=False, emit=''' PUT_OP(bits, rex2(src, dst), sink); @@ -480,7 +482,7 @@ frmov = TailRecipe( # XX /n with one arg in %rcx, for shifts. rc = TailRecipe( - 'rc', Binary, size=1, ins=(GPR, GPR.rcx), outs=0, + 'rc', Binary, base_size=1, ins=(GPR, GPR.rcx), outs=0, emit=''' PUT_OP(bits, rex1(in_reg0), sink); modrm_r_bits(in_reg0, bits, sink); @@ -488,7 +490,7 @@ rc = TailRecipe( # XX /n for division: inputs in %rax, %rdx, r. Outputs in %rax, %rdx. div = TailRecipe( - 'div', Ternary, size=1, + 'div', Ternary, base_size=1, ins=(GPR.rax, GPR.rdx, GPR), outs=(GPR.rax, GPR.rdx), emit=''' sink.trap(TrapCode::IntegerDivisionByZero, func.srclocs[inst]); @@ -498,7 +500,7 @@ div = TailRecipe( # XX /n for {s,u}mulx: inputs in %rax, r. Outputs in %rdx(hi):%rax(lo) mulx = TailRecipe( - 'mulx', Binary, size=1, + 'mulx', Binary, base_size=1, ins=(GPR.rax, GPR), outs=(GPR.rax, GPR.rdx), emit=''' PUT_OP(bits, rex1(in_reg1), sink); @@ -507,7 +509,7 @@ mulx = TailRecipe( # XX /n ib with 8-bit immediate sign-extended. r_ib = TailRecipe( - 'r_ib', BinaryImm, size=2, ins=GPR, outs=0, + 'r_ib', BinaryImm, base_size=2, ins=GPR, outs=0, instp=IsSignedInt(BinaryImm.imm, 8), emit=''' PUT_OP(bits, rex1(in_reg0), sink); @@ -518,7 +520,7 @@ r_ib = TailRecipe( # XX /n id with 32-bit immediate sign-extended. r_id = TailRecipe( - 'r_id', BinaryImm, size=5, ins=GPR, outs=0, + 'r_id', BinaryImm, base_size=5, ins=GPR, outs=0, instp=IsSignedInt(BinaryImm.imm, 32), emit=''' PUT_OP(bits, rex1(in_reg0), sink); @@ -529,7 +531,7 @@ r_id = TailRecipe( # XX /n id with 32-bit immediate sign-extended. UnaryImm version. u_id = TailRecipe( - 'u_id', UnaryImm, size=5, ins=(), outs=GPR, + 'u_id', UnaryImm, base_size=5, ins=(), outs=GPR, instp=IsSignedInt(UnaryImm.imm, 32), emit=''' PUT_OP(bits, rex1(out_reg0), sink); @@ -540,7 +542,7 @@ u_id = TailRecipe( # XX+rd id unary with 32-bit immediate. Note no recipe predicate. pu_id = TailRecipe( - 'pu_id', UnaryImm, size=4, ins=(), outs=GPR, + 'pu_id', UnaryImm, base_size=4, ins=(), outs=GPR, emit=''' // The destination register is encoded in the low bits of the opcode. // No ModR/M. @@ -551,7 +553,7 @@ pu_id = TailRecipe( # XX+rd id unary with bool immediate. Note no recipe predicate. pu_id_bool = TailRecipe( - 'pu_id_bool', UnaryBool, size=4, ins=(), outs=GPR, + 'pu_id_bool', UnaryBool, base_size=4, ins=(), outs=GPR, emit=''' // The destination register is encoded in the low bits of the opcode. // No ModR/M. @@ -562,7 +564,7 @@ pu_id_bool = TailRecipe( # XX+rd iq unary with 64-bit immediate. pu_iq = TailRecipe( - 'pu_iq', UnaryImm, size=8, ins=(), outs=GPR, + 'pu_iq', UnaryImm, base_size=8, ins=(), outs=GPR, emit=''' PUT_OP(bits | (out_reg0 & 7), rex1(out_reg0), sink); let imm: i64 = imm.into(); @@ -571,7 +573,7 @@ pu_iq = TailRecipe( # XX /n Unary with floating point 32-bit immediate equal to zero. f32imm_z = TailRecipe( - 'f32imm_z', UnaryIeee32, size=1, ins=(), outs=FPR, + 'f32imm_z', UnaryIeee32, base_size=1, ins=(), outs=FPR, instp=IsZero32BitFloat(UnaryIeee32.imm), emit=''' PUT_OP(bits, rex2(out_reg0, out_reg0), sink); @@ -580,7 +582,7 @@ f32imm_z = TailRecipe( # XX /n Unary with floating point 64-bit immediate equal to zero. f64imm_z = TailRecipe( - 'f64imm_z', UnaryIeee64, size=1, ins=(), outs=FPR, + 'f64imm_z', UnaryIeee64, base_size=1, ins=(), outs=FPR, instp=IsZero64BitFloat(UnaryIeee64.imm), emit=''' PUT_OP(bits, rex2(out_reg0, out_reg0), sink); @@ -588,21 +590,21 @@ f64imm_z = TailRecipe( ''') pushq = TailRecipe( - 'pushq', Unary, size=0, ins=GPR, outs=(), + 'pushq', Unary, base_size=0, ins=GPR, outs=(), emit=''' sink.trap(TrapCode::StackOverflow, func.srclocs[inst]); PUT_OP(bits | (in_reg0 & 7), rex1(in_reg0), sink); ''') popq = TailRecipe( - 'popq', NullAry, size=0, ins=(), outs=GPR, + 'popq', NullAry, base_size=0, ins=(), outs=GPR, emit=''' PUT_OP(bits | (out_reg0 & 7), rex1(out_reg0), sink); ''') # XX /r, for regmove instructions. copysp = TailRecipe( - 'copysp', CopySpecial, size=1, ins=(), outs=(), + 'copysp', CopySpecial, base_size=1, ins=(), outs=(), clobbers_flags=False, emit=''' PUT_OP(bits, rex2(dst, src), sink); @@ -610,14 +612,14 @@ copysp = TailRecipe( ''') adjustsp = TailRecipe( - 'adjustsp', Unary, size=1, ins=(GPR), outs=(), + 'adjustsp', Unary, base_size=1, ins=(GPR), outs=(), emit=''' PUT_OP(bits, rex2(RU::rsp.into(), in_reg0), sink); modrm_rr(RU::rsp.into(), in_reg0, sink); ''') adjustsp_ib = TailRecipe( - 'adjustsp_ib', UnaryImm, size=2, ins=(), outs=(), + 'adjustsp_ib', UnaryImm, base_size=2, ins=(), outs=(), instp=IsSignedInt(UnaryImm.imm, 8), emit=''' PUT_OP(bits, rex1(RU::rsp.into()), sink); @@ -627,7 +629,7 @@ adjustsp_ib = TailRecipe( ''') adjustsp_id = TailRecipe( - 'adjustsp_id', UnaryImm, size=5, ins=(), outs=(), + 'adjustsp_id', UnaryImm, base_size=5, ins=(), outs=(), instp=IsSignedInt(UnaryImm.imm, 32), emit=''' PUT_OP(bits, rex1(RU::rsp.into()), sink); @@ -639,7 +641,7 @@ adjustsp_id = TailRecipe( # XX+rd id with Abs4 function relocation. fnaddr4 = TailRecipe( - 'fnaddr4', FuncAddr, size=4, ins=(), outs=GPR, + 'fnaddr4', FuncAddr, base_size=4, ins=(), outs=GPR, emit=''' PUT_OP(bits | (out_reg0 & 7), rex1(out_reg0), sink); sink.reloc_external(Reloc::Abs4, @@ -650,7 +652,7 @@ fnaddr4 = TailRecipe( # XX+rd iq with Abs8 function relocation. fnaddr8 = TailRecipe( - 'fnaddr8', FuncAddr, size=8, ins=(), outs=GPR, + 'fnaddr8', FuncAddr, base_size=8, ins=(), outs=GPR, emit=''' PUT_OP(bits | (out_reg0 & 7), rex1(out_reg0), sink); sink.reloc_external(Reloc::Abs8, @@ -661,7 +663,7 @@ fnaddr8 = TailRecipe( # Similar to fnaddr4, but writes !0 (this is used by BaldrMonkey). allones_fnaddr4 = TailRecipe( - 'allones_fnaddr4', FuncAddr, size=4, ins=(), outs=GPR, + 'allones_fnaddr4', FuncAddr, base_size=4, ins=(), outs=GPR, emit=''' PUT_OP(bits | (out_reg0 & 7), rex1(out_reg0), sink); sink.reloc_external(Reloc::Abs4, @@ -673,7 +675,7 @@ allones_fnaddr4 = TailRecipe( # Similar to fnaddr8, but writes !0 (this is used by BaldrMonkey). allones_fnaddr8 = TailRecipe( - 'allones_fnaddr8', FuncAddr, size=8, ins=(), outs=GPR, + 'allones_fnaddr8', FuncAddr, base_size=8, ins=(), outs=GPR, emit=''' PUT_OP(bits | (out_reg0 & 7), rex1(out_reg0), sink); sink.reloc_external(Reloc::Abs8, @@ -684,7 +686,7 @@ allones_fnaddr8 = TailRecipe( ''') pcrel_fnaddr8 = TailRecipe( - 'pcrel_fnaddr8', FuncAddr, size=5, ins=(), outs=GPR, + 'pcrel_fnaddr8', FuncAddr, base_size=5, ins=(), outs=GPR, # rex2 gets passed 0 for r/m register because the upper bit of # r/m doesnt get decoded when in rip-relative addressing mode. emit=''' @@ -699,7 +701,7 @@ pcrel_fnaddr8 = TailRecipe( ''') got_fnaddr8 = TailRecipe( - 'got_fnaddr8', FuncAddr, size=5, ins=(), outs=GPR, + 'got_fnaddr8', FuncAddr, base_size=5, ins=(), outs=GPR, # rex2 gets passed 0 for r/m register because the upper bit of # r/m doesnt get decoded when in rip-relative addressing mode. emit=''' @@ -716,7 +718,7 @@ got_fnaddr8 = TailRecipe( # XX+rd id with Abs4 globalsym relocation. gvaddr4 = TailRecipe( - 'gvaddr4', UnaryGlobalValue, size=4, ins=(), outs=GPR, + 'gvaddr4', UnaryGlobalValue, base_size=4, ins=(), outs=GPR, emit=''' PUT_OP(bits | (out_reg0 & 7), rex1(out_reg0), sink); sink.reloc_external(Reloc::Abs4, @@ -727,7 +729,7 @@ gvaddr4 = TailRecipe( # XX+rd iq with Abs8 globalsym relocation. gvaddr8 = TailRecipe( - 'gvaddr8', UnaryGlobalValue, size=8, ins=(), outs=GPR, + 'gvaddr8', UnaryGlobalValue, base_size=8, ins=(), outs=GPR, emit=''' PUT_OP(bits | (out_reg0 & 7), rex1(out_reg0), sink); sink.reloc_external(Reloc::Abs8, @@ -738,7 +740,7 @@ gvaddr8 = TailRecipe( # XX+rd iq with PCRel4 globalsym relocation. pcrel_gvaddr8 = TailRecipe( - 'pcrel_gvaddr8', UnaryGlobalValue, size=5, ins=(), outs=GPR, + 'pcrel_gvaddr8', UnaryGlobalValue, base_size=5, ins=(), outs=GPR, emit=''' PUT_OP(bits, rex2(0, out_reg0), sink); modrm_rm(5, out_reg0, sink); @@ -752,7 +754,7 @@ pcrel_gvaddr8 = TailRecipe( # XX+rd iq with Abs8 globalsym relocation. got_gvaddr8 = TailRecipe( - 'got_gvaddr8', UnaryGlobalValue, size=5, ins=(), outs=GPR, + 'got_gvaddr8', UnaryGlobalValue, base_size=5, ins=(), outs=GPR, emit=''' PUT_OP(bits, rex2(0, out_reg0), sink); modrm_rm(5, out_reg0, sink); @@ -771,7 +773,7 @@ got_gvaddr8 = TailRecipe( # spaddr4_id = TailRecipe( - 'spaddr4_id', StackLoad, size=6, ins=(), outs=GPR, + 'spaddr4_id', StackLoad, base_size=6, ins=(), outs=GPR, emit=''' let sp = StackRef::sp(stack_slot, &func.stack_slots); let base = stk_base(sp.base); @@ -783,7 +785,7 @@ spaddr4_id = TailRecipe( ''') spaddr8_id = TailRecipe( - 'spaddr8_id', StackLoad, size=6, ins=(), outs=GPR, + 'spaddr8_id', StackLoad, base_size=6, ins=(), outs=GPR, emit=''' let sp = StackRef::sp(stack_slot, &func.stack_slots); let base = stk_base(sp.base); @@ -801,37 +803,50 @@ spaddr8_id = TailRecipe( # XX /r register-indirect store with no offset. st = TailRecipe( - 'st', Store, size=1, ins=(GPR, GPR_ZERO_DEREF_SAFE), outs=(), + 'st', Store, base_size=1, ins=(GPR, GPR), outs=(), instp=IsEqual(Store.offset, 0), clobbers_flags=False, + compute_size="size_plus_maybe_offset_for_in_reg_1", emit=''' if !flags.notrap() { sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); } PUT_OP(bits, rex2(in_reg1, in_reg0), sink); - modrm_rm(in_reg1, in_reg0, sink); + if needs_offset(in_reg1) { + modrm_disp8(in_reg1, in_reg0, sink); + sink.put1(0); + } else { + modrm_rm(in_reg1, in_reg0, sink); + } ''') # XX /r register-indirect store with index and no offset. stWithIndex = TailRecipe( - 'stWithIndex', StoreComplex, size=2, - ins=(GPR, GPR_ZERO_DEREF_SAFE, GPR_DEREF_SAFE), + 'stWithIndex', StoreComplex, base_size=2, + ins=(GPR, GPR, GPR), outs=(), instp=IsEqual(StoreComplex.offset, 0), clobbers_flags=False, + compute_size="size_plus_maybe_offset_for_in_reg_1", emit=''' if !flags.notrap() { sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); } PUT_OP(bits, rex3(in_reg1, in_reg0, in_reg2), sink); - modrm_sib(in_reg0, sink); - sib(0, in_reg2, in_reg1, sink); + if needs_offset(in_reg1) { + modrm_sib_disp8(in_reg0, sink); + sib(0, in_reg2, in_reg1, sink); + sink.put1(0); + } else { + modrm_sib(in_reg0, sink); + sib(0, in_reg2, in_reg1, sink); + } ''') # XX /r register-indirect store with no offset. # Only ABCD allowed for stored value. This is for byte stores with no REX. st_abcd = TailRecipe( - 'st_abcd', Store, size=1, ins=(ABCD, GPR), outs=(), + 'st_abcd', Store, base_size=1, ins=(ABCD, GPR), outs=(), instp=IsEqual(Store.offset, 0), when_prefixed=st, clobbers_flags=False, @@ -846,66 +861,92 @@ st_abcd = TailRecipe( # XX /r register-indirect store with index and no offset. # Only ABCD allowed for stored value. This is for byte stores with no REX. stWithIndex_abcd = TailRecipe( - 'stWithIndex_abcd', StoreComplex, size=2, - ins=(ABCD, GPR_ZERO_DEREF_SAFE, GPR_DEREF_SAFE), + 'stWithIndex_abcd', StoreComplex, base_size=2, + ins=(ABCD, GPR, GPR), outs=(), instp=IsEqual(StoreComplex.offset, 0), clobbers_flags=False, + compute_size="size_plus_maybe_offset_for_in_reg_1", emit=''' if !flags.notrap() { sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); } PUT_OP(bits, rex3(in_reg1, in_reg0, in_reg2), sink); - modrm_sib(in_reg0, sink); - sib(0, in_reg2, in_reg1, sink); + if needs_offset(in_reg1) { + modrm_sib_disp8(in_reg0, sink); + sib(0, in_reg2, in_reg1, sink); + sink.put1(0); + } else { + modrm_sib(in_reg0, sink); + sib(0, in_reg2, in_reg1, sink); + } ''') # XX /r register-indirect store of FPR with no offset. fst = TailRecipe( - 'fst', Store, size=1, ins=(FPR, GPR_ZERO_DEREF_SAFE), outs=(), + 'fst', Store, base_size=1, ins=(FPR, GPR), outs=(), instp=IsEqual(Store.offset, 0), clobbers_flags=False, + compute_size="size_plus_maybe_offset_for_in_reg_1", emit=''' if !flags.notrap() { sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); } PUT_OP(bits, rex2(in_reg1, in_reg0), sink); - modrm_rm(in_reg1, in_reg0, sink); + if needs_offset(in_reg1) { + modrm_disp8(in_reg1, in_reg0, sink); + sink.put1(0); + } else { + modrm_rm(in_reg1, in_reg0, sink); + } ''') # XX /r register-indirect store with index and no offset of FPR. fstWithIndex = TailRecipe( - 'fstWithIndex', StoreComplex, size=2, - ins=(FPR, GPR_ZERO_DEREF_SAFE, GPR_DEREF_SAFE), outs=(), + 'fstWithIndex', StoreComplex, base_size=2, + ins=(FPR, GPR, GPR), outs=(), instp=IsEqual(StoreComplex.offset, 0), clobbers_flags=False, + compute_size="size_plus_maybe_offset_for_in_reg_1", emit=''' if !flags.notrap() { sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); } PUT_OP(bits, rex3(in_reg1, in_reg0, in_reg2), sink); - modrm_sib(in_reg0, sink); - sib(0, in_reg2, in_reg1, sink); + if needs_offset(in_reg1) { + modrm_sib_disp8(in_reg0, sink); + sib(0, in_reg2, in_reg1, sink); + sink.put1(0); + } else { + modrm_sib(in_reg0, sink); + sib(0, in_reg2, in_reg1, sink); + } ''') # XX /r register-indirect store with 8-bit offset. stDisp8 = TailRecipe( - 'stDisp8', Store, size=2, ins=(GPR, GPR_DEREF_SAFE), outs=(), + 'stDisp8', Store, base_size=2, ins=(GPR, GPR), outs=(), instp=IsSignedInt(Store.offset, 8), clobbers_flags=False, + compute_size="size_plus_maybe_sib_for_in_reg_1", emit=''' if !flags.notrap() { sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); } PUT_OP(bits, rex2(in_reg1, in_reg0), sink); - modrm_disp8(in_reg1, in_reg0, sink); + if needs_sib_byte(in_reg1) { + modrm_sib_disp8(in_reg0, sink); + sib_noindex(in_reg1, sink); + } else { + modrm_disp8(in_reg1, in_reg0, sink); + } let offset: i32 = offset.into(); sink.put1(offset as u8); ''') # XX /r register-indirect store with index and 8-bit offset. stWithIndexDisp8 = TailRecipe( - 'stWithIndexDisp8', StoreComplex, size=3, - ins=(GPR, GPR, GPR_DEREF_SAFE), + 'stWithIndexDisp8', StoreComplex, base_size=3, + ins=(GPR, GPR, GPR), outs=(), instp=IsSignedInt(StoreComplex.offset, 8), clobbers_flags=False, @@ -923,7 +964,7 @@ stWithIndexDisp8 = TailRecipe( # XX /r register-indirect store with 8-bit offset. # Only ABCD allowed for stored value. This is for byte stores with no REX. stDisp8_abcd = TailRecipe( - 'stDisp8_abcd', Store, size=2, ins=(ABCD, GPR), outs=(), + 'stDisp8_abcd', Store, base_size=2, ins=(ABCD, GPR), outs=(), instp=IsSignedInt(Store.offset, 8), when_prefixed=stDisp8, clobbers_flags=False, @@ -940,8 +981,8 @@ stDisp8_abcd = TailRecipe( # XX /r register-indirect store with index and 8-bit offset. # Only ABCD allowed for stored value. This is for byte stores with no REX. stWithIndexDisp8_abcd = TailRecipe( - 'stWithIndexDisp8_abcd', StoreComplex, size=3, - ins=(ABCD, GPR, GPR_DEREF_SAFE), + 'stWithIndexDisp8_abcd', StoreComplex, base_size=3, + ins=(ABCD, GPR, GPR), outs=(), instp=IsSignedInt(StoreComplex.offset, 8), clobbers_flags=False, @@ -958,23 +999,29 @@ stWithIndexDisp8_abcd = TailRecipe( # XX /r register-indirect store with 8-bit offset of FPR. fstDisp8 = TailRecipe( - 'fstDisp8', Store, size=2, ins=(FPR, GPR_DEREF_SAFE), outs=(), + 'fstDisp8', Store, base_size=2, ins=(FPR, GPR), outs=(), instp=IsSignedInt(Store.offset, 8), clobbers_flags=False, + compute_size='size_plus_maybe_sib_for_in_reg_1', emit=''' if !flags.notrap() { sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); } PUT_OP(bits, rex2(in_reg1, in_reg0), sink); - modrm_disp8(in_reg1, in_reg0, sink); + if needs_sib_byte(in_reg1) { + modrm_sib_disp8(in_reg0, sink); + sib_noindex(in_reg1, sink); + } else { + modrm_disp8(in_reg1, in_reg0, sink); + } let offset: i32 = offset.into(); sink.put1(offset as u8); ''') # XX /r register-indirect store with index and 8-bit offset of FPR. fstWithIndexDisp8 = TailRecipe( - 'fstWithIndexDisp8', StoreComplex, size=3, - ins=(FPR, GPR, GPR_DEREF_SAFE), + 'fstWithIndexDisp8', StoreComplex, base_size=3, + ins=(FPR, GPR, GPR), outs=(), instp=IsSignedInt(StoreComplex.offset, 8), clobbers_flags=False, @@ -991,22 +1038,28 @@ fstWithIndexDisp8 = TailRecipe( # XX /r register-indirect store with 32-bit offset. stDisp32 = TailRecipe( - 'stDisp32', Store, size=5, ins=(GPR, GPR_DEREF_SAFE), outs=(), + 'stDisp32', Store, base_size=5, ins=(GPR, GPR), outs=(), clobbers_flags=False, + compute_size='size_plus_maybe_sib_for_in_reg_1', emit=''' if !flags.notrap() { sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); } PUT_OP(bits, rex2(in_reg1, in_reg0), sink); - modrm_disp32(in_reg1, in_reg0, sink); + if needs_sib_byte(in_reg1) { + modrm_sib_disp32(in_reg0, sink); + sib_noindex(in_reg1, sink); + } else { + modrm_disp32(in_reg1, in_reg0, sink); + } let offset: i32 = offset.into(); sink.put4(offset as u32); ''') # XX /r register-indirect store with index and 32-bit offset. stWithIndexDisp32 = TailRecipe( - 'stWithIndexDisp32', StoreComplex, size=6, - ins=(GPR, GPR, GPR_DEREF_SAFE), + 'stWithIndexDisp32', StoreComplex, base_size=6, + ins=(GPR, GPR, GPR), outs=(), instp=IsSignedInt(StoreComplex.offset, 32), clobbers_flags=False, @@ -1024,7 +1077,7 @@ stWithIndexDisp32 = TailRecipe( # XX /r register-indirect store with 32-bit offset. # Only ABCD allowed for stored value. This is for byte stores with no REX. stDisp32_abcd = TailRecipe( - 'stDisp32_abcd', Store, size=5, ins=(ABCD, GPR), outs=(), + 'stDisp32_abcd', Store, base_size=5, ins=(ABCD, GPR), outs=(), when_prefixed=stDisp32, clobbers_flags=False, emit=''' @@ -1040,8 +1093,8 @@ stDisp32_abcd = TailRecipe( # XX /r register-indirect store with index and 32-bit offset. # Only ABCD allowed for stored value. This is for byte stores with no REX. stWithIndexDisp32_abcd = TailRecipe( - 'stWithIndexDisp32_abcd', StoreComplex, size=6, - ins=(ABCD, GPR, GPR_DEREF_SAFE), + 'stWithIndexDisp32_abcd', StoreComplex, base_size=6, + ins=(ABCD, GPR, GPR), outs=(), instp=IsSignedInt(StoreComplex.offset, 32), clobbers_flags=False, @@ -1058,22 +1111,28 @@ stWithIndexDisp32_abcd = TailRecipe( # XX /r register-indirect store with 32-bit offset of FPR. fstDisp32 = TailRecipe( - 'fstDisp32', Store, size=5, ins=(FPR, GPR_DEREF_SAFE), outs=(), + 'fstDisp32', Store, base_size=5, ins=(FPR, GPR), outs=(), clobbers_flags=False, + compute_size='size_plus_maybe_sib_for_in_reg_1', emit=''' if !flags.notrap() { sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); } PUT_OP(bits, rex2(in_reg1, in_reg0), sink); - modrm_disp32(in_reg1, in_reg0, sink); + if needs_sib_byte(in_reg1) { + modrm_sib_disp32(in_reg0, sink); + sib_noindex(in_reg1, sink); + } else { + modrm_disp32(in_reg1, in_reg0, sink); + } let offset: i32 = offset.into(); sink.put4(offset as u32); ''') # XX /r register-indirect store with index and 32-bit offset of FPR. fstWithIndexDisp32 = TailRecipe( - 'fstWithIndexDisp32', StoreComplex, size=6, - ins=(FPR, GPR, GPR_DEREF_SAFE), + 'fstWithIndexDisp32', StoreComplex, base_size=6, + ins=(FPR, GPR, GPR), outs=(), instp=IsSignedInt(StoreComplex.offset, 32), clobbers_flags=False, @@ -1090,7 +1149,7 @@ fstWithIndexDisp32 = TailRecipe( # Unary spill with SIB and 32-bit displacement. spillSib32 = TailRecipe( - 'spillSib32', Unary, size=6, ins=GPR, outs=StackGPR32, + 'spillSib32', Unary, base_size=6, ins=GPR, outs=StackGPR32, clobbers_flags=False, emit=''' sink.trap(TrapCode::StackOverflow, func.srclocs[inst]); @@ -1103,7 +1162,7 @@ spillSib32 = TailRecipe( # Like spillSib32, but targeting an FPR rather than a GPR. fspillSib32 = TailRecipe( - 'fspillSib32', Unary, size=6, ins=FPR, outs=StackFPR32, + 'fspillSib32', Unary, base_size=6, ins=FPR, outs=StackFPR32, clobbers_flags=False, emit=''' sink.trap(TrapCode::StackOverflow, func.srclocs[inst]); @@ -1116,7 +1175,7 @@ fspillSib32 = TailRecipe( # Regspill using RSP-relative addressing. regspill32 = TailRecipe( - 'regspill32', RegSpill, size=6, ins=GPR, outs=(), + 'regspill32', RegSpill, base_size=6, ins=GPR, outs=(), clobbers_flags=False, emit=''' sink.trap(TrapCode::StackOverflow, func.srclocs[inst]); @@ -1130,7 +1189,7 @@ regspill32 = TailRecipe( # Like regspill32, but targeting an FPR rather than a GPR. fregspill32 = TailRecipe( - 'fregspill32', RegSpill, size=6, ins=FPR, outs=(), + 'fregspill32', RegSpill, base_size=6, ins=FPR, outs=(), clobbers_flags=False, emit=''' sink.trap(TrapCode::StackOverflow, func.srclocs[inst]); @@ -1148,81 +1207,113 @@ fregspill32 = TailRecipe( # XX /r load with no offset. ld = TailRecipe( - 'ld', Load, size=1, ins=(GPR_ZERO_DEREF_SAFE), outs=(GPR), + 'ld', Load, base_size=1, ins=(GPR), outs=(GPR), instp=IsEqual(Load.offset, 0), clobbers_flags=False, + compute_size="size_plus_maybe_offset_for_in_reg_0", emit=''' if !flags.notrap() { sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); } PUT_OP(bits, rex2(in_reg0, out_reg0), sink); - modrm_rm(in_reg0, out_reg0, sink); + if needs_offset(in_reg0) { + modrm_disp8(in_reg0, out_reg0, sink); + sink.put1(0); + } else { + modrm_rm(in_reg0, out_reg0, sink); + } ''') # XX /r load with index and no offset. ldWithIndex = TailRecipe( - 'ldWithIndex', LoadComplex, size=2, - ins=(GPR_ZERO_DEREF_SAFE, GPR_DEREF_SAFE), + 'ldWithIndex', LoadComplex, base_size=2, + ins=(GPR, GPR), outs=(GPR), instp=IsEqual(LoadComplex.offset, 0), clobbers_flags=False, + compute_size="size_plus_maybe_offset_for_in_reg_0", emit=''' if !flags.notrap() { sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); } PUT_OP(bits, rex3(in_reg0, out_reg0, in_reg1), sink); - modrm_sib(out_reg0, sink); - sib(0, in_reg1, in_reg0, sink); + if needs_offset(in_reg0) { + modrm_sib_disp8(out_reg0, sink); + sib(0, in_reg1, in_reg0, sink); + sink.put1(0); + } else { + modrm_sib(out_reg0, sink); + sib(0, in_reg1, in_reg0, sink); + } ''') # XX /r float load with no offset. fld = TailRecipe( - 'fld', Load, size=1, ins=(GPR_ZERO_DEREF_SAFE), outs=(FPR), + 'fld', Load, base_size=1, ins=(GPR), outs=(FPR), instp=IsEqual(Load.offset, 0), clobbers_flags=False, + compute_size="size_plus_maybe_offset_for_in_reg_0", emit=''' if !flags.notrap() { sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); } PUT_OP(bits, rex2(in_reg0, out_reg0), sink); - modrm_rm(in_reg0, out_reg0, sink); + if needs_offset(in_reg0) { + modrm_disp8(in_reg0, out_reg0, sink); + sink.put1(0); + } else { + modrm_rm(in_reg0, out_reg0, sink); + } ''') # XX /r float load with index and no offset. fldWithIndex = TailRecipe( - 'fldWithIndex', LoadComplex, size=2, - ins=(GPR_ZERO_DEREF_SAFE, GPR_DEREF_SAFE), + 'fldWithIndex', LoadComplex, base_size=2, + ins=(GPR, GPR), outs=(FPR), instp=IsEqual(LoadComplex.offset, 0), clobbers_flags=False, + compute_size="size_plus_maybe_offset_for_in_reg_0", emit=''' if !flags.notrap() { sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); } PUT_OP(bits, rex3(in_reg0, out_reg0, in_reg1), sink); - modrm_sib(out_reg0, sink); - sib(0, in_reg1, in_reg0, sink); + if needs_offset(in_reg0) { + modrm_sib_disp8(out_reg0, sink); + sib(0, in_reg1, in_reg0, sink); + sink.put1(0); + } else { + modrm_sib(out_reg0, sink); + sib(0, in_reg1, in_reg0, sink); + } ''') # XX /r load with 8-bit offset. ldDisp8 = TailRecipe( - 'ldDisp8', Load, size=2, ins=(GPR_DEREF_SAFE), outs=(GPR), + 'ldDisp8', Load, base_size=2, ins=(GPR), outs=(GPR), instp=IsSignedInt(Load.offset, 8), clobbers_flags=False, + compute_size="size_plus_maybe_sib_for_in_reg_0", emit=''' if !flags.notrap() { sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); } PUT_OP(bits, rex2(in_reg0, out_reg0), sink); - modrm_disp8(in_reg0, out_reg0, sink); + if needs_sib_byte(in_reg0) { + modrm_sib_disp8(out_reg0, sink); + sib_noindex(in_reg0, sink); + } else { + modrm_disp8(in_reg0, out_reg0, sink); + } let offset: i32 = offset.into(); sink.put1(offset as u8); ''') # XX /r load with index and 8-bit offset. ldWithIndexDisp8 = TailRecipe( - 'ldWithIndexDisp8', LoadComplex, size=3, - ins=(GPR, GPR_DEREF_SAFE), + 'ldWithIndexDisp8', LoadComplex, base_size=3, + ins=(GPR, GPR), outs=(GPR), instp=IsSignedInt(LoadComplex.offset, 8), clobbers_flags=False, @@ -1239,23 +1330,29 @@ ldWithIndexDisp8 = TailRecipe( # XX /r float load with 8-bit offset. fldDisp8 = TailRecipe( - 'fldDisp8', Load, size=2, ins=(GPR_DEREF_SAFE), outs=(FPR), + 'fldDisp8', Load, base_size=2, ins=(GPR), outs=(FPR), instp=IsSignedInt(Load.offset, 8), clobbers_flags=False, + compute_size="size_plus_maybe_sib_for_in_reg_0", emit=''' if !flags.notrap() { sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); } PUT_OP(bits, rex2(in_reg0, out_reg0), sink); - modrm_disp8(in_reg0, out_reg0, sink); + if needs_sib_byte(in_reg0) { + modrm_sib_disp8(out_reg0, sink); + sib_noindex(in_reg0, sink); + } else { + modrm_disp8(in_reg0, out_reg0, sink); + } let offset: i32 = offset.into(); sink.put1(offset as u8); ''') # XX /r float load with 8-bit offset. fldWithIndexDisp8 = TailRecipe( - 'fldWithIndexDisp8', LoadComplex, size=3, - ins=(GPR, GPR_DEREF_SAFE), + 'fldWithIndexDisp8', LoadComplex, base_size=3, + ins=(GPR, GPR), outs=(FPR), instp=IsSignedInt(LoadComplex.offset, 8), clobbers_flags=False, @@ -1272,23 +1369,29 @@ fldWithIndexDisp8 = TailRecipe( # XX /r load with 32-bit offset. ldDisp32 = TailRecipe( - 'ldDisp32', Load, size=5, ins=(GPR_DEREF_SAFE), outs=(GPR), + 'ldDisp32', Load, base_size=5, ins=(GPR), outs=(GPR), instp=IsSignedInt(Load.offset, 32), clobbers_flags=False, + compute_size='size_plus_maybe_sib_for_in_reg_0', emit=''' if !flags.notrap() { sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); } PUT_OP(bits, rex2(in_reg0, out_reg0), sink); - modrm_disp32(in_reg0, out_reg0, sink); + if needs_sib_byte(in_reg0) { + modrm_sib_disp32(out_reg0, sink); + sib_noindex(in_reg0, sink); + } else { + modrm_disp32(in_reg0, out_reg0, sink); + } let offset: i32 = offset.into(); sink.put4(offset as u32); ''') # XX /r load with index and 32-bit offset. ldWithIndexDisp32 = TailRecipe( - 'ldWithIndexDisp32', LoadComplex, size=6, - ins=(GPR, GPR_DEREF_SAFE), + 'ldWithIndexDisp32', LoadComplex, base_size=6, + ins=(GPR, GPR), outs=(GPR), instp=IsSignedInt(LoadComplex.offset, 32), clobbers_flags=False, @@ -1305,23 +1408,29 @@ ldWithIndexDisp32 = TailRecipe( # XX /r float load with 32-bit offset. fldDisp32 = TailRecipe( - 'fldDisp32', Load, size=5, ins=(GPR_DEREF_SAFE), outs=(FPR), + 'fldDisp32', Load, base_size=5, ins=(GPR), outs=(FPR), instp=IsSignedInt(Load.offset, 32), clobbers_flags=False, + compute_size="size_plus_maybe_sib_for_in_reg_0", emit=''' if !flags.notrap() { sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]); } PUT_OP(bits, rex2(in_reg0, out_reg0), sink); - modrm_disp32(in_reg0, out_reg0, sink); + if needs_sib_byte(in_reg0) { + modrm_sib_disp32(out_reg0, sink); + sib_noindex(in_reg0, sink); + } else { + modrm_disp32(in_reg0, out_reg0, sink); + } let offset: i32 = offset.into(); sink.put4(offset as u32); ''') # XX /r float load with index and 32-bit offset. fldWithIndexDisp32 = TailRecipe( - 'fldWithIndexDisp32', LoadComplex, size=6, - ins=(GPR, GPR_DEREF_SAFE), + 'fldWithIndexDisp32', LoadComplex, base_size=6, + ins=(GPR, GPR), outs=(FPR), instp=IsSignedInt(LoadComplex.offset, 32), clobbers_flags=False, @@ -1338,7 +1447,7 @@ fldWithIndexDisp32 = TailRecipe( # Unary fill with SIB and 32-bit displacement. fillSib32 = TailRecipe( - 'fillSib32', Unary, size=6, ins=StackGPR32, outs=GPR, + 'fillSib32', Unary, base_size=6, ins=StackGPR32, outs=GPR, clobbers_flags=False, emit=''' let base = stk_base(in_stk0.base); @@ -1350,7 +1459,7 @@ fillSib32 = TailRecipe( # Like fillSib32, but targeting an FPR rather than a GPR. ffillSib32 = TailRecipe( - 'ffillSib32', Unary, size=6, ins=StackFPR32, outs=FPR, + 'ffillSib32', Unary, base_size=6, ins=StackFPR32, outs=FPR, clobbers_flags=False, emit=''' let base = stk_base(in_stk0.base); @@ -1362,7 +1471,7 @@ ffillSib32 = TailRecipe( # Regfill with RSP-relative 32-bit displacement. regfill32 = TailRecipe( - 'regfill32', RegFill, size=6, ins=StackGPR32, outs=(), + 'regfill32', RegFill, base_size=6, ins=StackGPR32, outs=(), clobbers_flags=False, emit=''' let src = StackRef::sp(src, &func.stack_slots); @@ -1375,7 +1484,7 @@ regfill32 = TailRecipe( # Like regfill32, but targeting an FPR rather than a GPR. fregfill32 = TailRecipe( - 'fregfill32', RegFill, size=6, ins=StackFPR32, outs=(), + 'fregfill32', RegFill, base_size=6, ins=StackFPR32, outs=(), clobbers_flags=False, emit=''' let src = StackRef::sp(src, &func.stack_slots); @@ -1390,7 +1499,7 @@ fregfill32 = TailRecipe( # Call/return # call_id = TailRecipe( - 'call_id', Call, size=4, ins=(), outs=(), + 'call_id', Call, base_size=4, ins=(), outs=(), emit=''' sink.trap(TrapCode::StackOverflow, func.srclocs[inst]); PUT_OP(bits, BASE_REX, sink); @@ -1403,7 +1512,7 @@ call_id = TailRecipe( ''') call_plt_id = TailRecipe( - 'call_plt_id', Call, size=4, ins=(), outs=(), + 'call_plt_id', Call, base_size=4, ins=(), outs=(), emit=''' sink.trap(TrapCode::StackOverflow, func.srclocs[inst]); PUT_OP(bits, BASE_REX, sink); @@ -1414,7 +1523,7 @@ call_plt_id = TailRecipe( ''') call_r = TailRecipe( - 'call_r', CallIndirect, size=1, ins=GPR, outs=(), + 'call_r', CallIndirect, base_size=1, ins=GPR, outs=(), emit=''' sink.trap(TrapCode::StackOverflow, func.srclocs[inst]); PUT_OP(bits, rex1(in_reg0), sink); @@ -1422,7 +1531,7 @@ call_r = TailRecipe( ''') ret = TailRecipe( - 'ret', MultiAry, size=0, ins=(), outs=(), + 'ret', MultiAry, base_size=0, ins=(), outs=(), emit=''' PUT_OP(bits, BASE_REX, sink); ''') @@ -1431,7 +1540,7 @@ ret = TailRecipe( # Branches # jmpb = TailRecipe( - 'jmpb', Jump, size=1, ins=(), outs=(), + 'jmpb', Jump, base_size=1, ins=(), outs=(), branch_range=8, clobbers_flags=False, emit=''' @@ -1440,7 +1549,7 @@ jmpb = TailRecipe( ''') jmpd = TailRecipe( - 'jmpd', Jump, size=4, ins=(), outs=(), + 'jmpd', Jump, base_size=4, ins=(), outs=(), branch_range=32, clobbers_flags=False, emit=''' @@ -1449,7 +1558,7 @@ jmpd = TailRecipe( ''') brib = TailRecipe( - 'brib', BranchInt, size=1, ins=FLAG.rflags, outs=(), + 'brib', BranchInt, base_size=1, ins=FLAG.rflags, outs=(), branch_range=8, clobbers_flags=False, emit=''' @@ -1458,7 +1567,7 @@ brib = TailRecipe( ''') brid = TailRecipe( - 'brid', BranchInt, size=4, ins=FLAG.rflags, outs=(), + 'brid', BranchInt, base_size=4, ins=FLAG.rflags, outs=(), branch_range=32, clobbers_flags=False, emit=''' @@ -1467,7 +1576,7 @@ brid = TailRecipe( ''') brfb = TailRecipe( - 'brfb', BranchFloat, size=1, ins=FLAG.rflags, outs=(), + 'brfb', BranchFloat, base_size=1, ins=FLAG.rflags, outs=(), branch_range=8, clobbers_flags=False, instp=floatccs(BranchFloat), @@ -1477,7 +1586,7 @@ brfb = TailRecipe( ''') brfd = TailRecipe( - 'brfd', BranchFloat, size=4, ins=FLAG.rflags, outs=(), + 'brfd', BranchFloat, base_size=4, ins=FLAG.rflags, outs=(), branch_range=32, clobbers_flags=False, instp=floatccs(BranchFloat), @@ -1487,7 +1596,7 @@ brfd = TailRecipe( ''') indirect_jmp = TailRecipe( - 'indirect_jmp', IndirectJump, size=1, ins=GPR, outs=(), + 'indirect_jmp', IndirectJump, base_size=1, ins=GPR, outs=(), clobbers_flags=False, emit=''' PUT_OP(bits, rex1(in_reg0), sink); @@ -1495,19 +1604,26 @@ indirect_jmp = TailRecipe( ''') jt_entry = TailRecipe( - 'jt_entry', BranchTableEntry, size=2, - ins=(GPR_DEREF_SAFE, GPR_ZERO_DEREF_SAFE), + 'jt_entry', BranchTableEntry, base_size=2, + ins=(GPR, GPR), outs=(GPR), clobbers_flags=False, instp=valid_scale(BranchTableEntry), + compute_size="size_plus_maybe_offset_for_in_reg_1", emit=''' PUT_OP(bits, rex3(in_reg1, out_reg0, in_reg0), sink); - modrm_sib(out_reg0, sink); - sib(imm.trailing_zeros() as u8, in_reg0, in_reg1, sink); + if needs_offset(in_reg1) { + modrm_sib_disp8(out_reg0, sink); + sib(imm.trailing_zeros() as u8, in_reg0, in_reg1, sink); + sink.put1(0); + } else { + modrm_sib(out_reg0, sink); + sib(imm.trailing_zeros() as u8, in_reg0, in_reg1, sink); + } ''') jt_base = TailRecipe( - 'jt_base', BranchTableBase, size=5, ins=(), outs=(GPR), + 'jt_base', BranchTableBase, base_size=5, ins=(), outs=(GPR), clobbers_flags=False, emit=''' PUT_OP(bits, rex2(0, out_reg0), sink); @@ -1529,7 +1645,7 @@ jt_base = TailRecipe( # seti = TailRecipe( - 'seti', IntCond, size=1, ins=FLAG.rflags, outs=GPR, + 'seti', IntCond, base_size=1, ins=FLAG.rflags, outs=GPR, requires_prefix=True, clobbers_flags=False, emit=''' @@ -1537,7 +1653,7 @@ seti = TailRecipe( modrm_r_bits(out_reg0, bits, sink); ''') seti_abcd = TailRecipe( - 'seti_abcd', IntCond, size=1, ins=FLAG.rflags, outs=ABCD, + 'seti_abcd', IntCond, base_size=1, ins=FLAG.rflags, outs=ABCD, when_prefixed=seti, clobbers_flags=False, emit=''' @@ -1546,7 +1662,7 @@ seti_abcd = TailRecipe( ''') setf = TailRecipe( - 'setf', FloatCond, size=1, ins=FLAG.rflags, outs=GPR, + 'setf', FloatCond, base_size=1, ins=FLAG.rflags, outs=GPR, requires_prefix=True, clobbers_flags=False, emit=''' @@ -1554,7 +1670,7 @@ setf = TailRecipe( modrm_r_bits(out_reg0, bits, sink); ''') setf_abcd = TailRecipe( - 'setf_abcd', FloatCond, size=1, ins=FLAG.rflags, outs=ABCD, + 'setf_abcd', FloatCond, base_size=1, ins=FLAG.rflags, outs=ABCD, when_prefixed=setf, clobbers_flags=False, emit=''' @@ -1568,7 +1684,7 @@ setf_abcd = TailRecipe( # 1 byte, modrm(r,r), is after the opcode # cmov = TailRecipe( - 'cmov', IntSelect, size=1, ins=(FLAG.rflags, GPR, GPR), outs=2, + 'cmov', IntSelect, base_size=1, ins=(FLAG.rflags, GPR, GPR), outs=2, requires_prefix=False, clobbers_flags=False, emit=''' @@ -1580,7 +1696,7 @@ cmov = TailRecipe( # Bit scan forwards and reverse # bsf_and_bsr = TailRecipe( - 'bsf_and_bsr', Unary, size=1, ins=GPR, outs=(GPR, FLAG.rflags), + 'bsf_and_bsr', Unary, base_size=1, ins=GPR, outs=(GPR, FLAG.rflags), requires_prefix=False, clobbers_flags=True, emit=''' @@ -1594,7 +1710,7 @@ bsf_and_bsr = TailRecipe( # XX /r, MR form. Compare two GPR registers and set flags. rcmp = TailRecipe( - 'rcmp', Binary, size=1, ins=(GPR, GPR), outs=FLAG.rflags, + 'rcmp', Binary, base_size=1, ins=(GPR, GPR), outs=FLAG.rflags, emit=''' PUT_OP(bits, rex2(in_reg0, in_reg1), sink); modrm_rr(in_reg0, in_reg1, sink); @@ -1602,7 +1718,7 @@ rcmp = TailRecipe( # XX /r, RM form. Compare two FPR registers and set flags. fcmp = TailRecipe( - 'fcmp', Binary, size=1, ins=(FPR, FPR), outs=FLAG.rflags, + 'fcmp', Binary, base_size=1, ins=(FPR, FPR), outs=FLAG.rflags, emit=''' PUT_OP(bits, rex2(in_reg1, in_reg0), sink); modrm_rr(in_reg1, in_reg0, sink); @@ -1610,7 +1726,7 @@ fcmp = TailRecipe( # XX /n, MI form with imm8. rcmp_ib = TailRecipe( - 'rcmp_ib', BinaryImm, size=2, ins=GPR, outs=FLAG.rflags, + 'rcmp_ib', BinaryImm, base_size=2, ins=GPR, outs=FLAG.rflags, instp=IsSignedInt(BinaryImm.imm, 8), emit=''' PUT_OP(bits, rex1(in_reg0), sink); @@ -1621,7 +1737,7 @@ rcmp_ib = TailRecipe( # XX /n, MI form with imm32. rcmp_id = TailRecipe( - 'rcmp_id', BinaryImm, size=5, ins=GPR, outs=FLAG.rflags, + 'rcmp_id', BinaryImm, base_size=5, ins=GPR, outs=FLAG.rflags, instp=IsSignedInt(BinaryImm.imm, 32), emit=''' PUT_OP(bits, rex1(in_reg0), sink); @@ -1632,7 +1748,7 @@ rcmp_id = TailRecipe( # Same as rcmp, but second operand is the stack pointer. rcmp_sp = TailRecipe( - 'rcmp_sp', Unary, size=1, ins=GPR, outs=FLAG.rflags, + 'rcmp_sp', Unary, base_size=1, ins=GPR, outs=FLAG.rflags, emit=''' PUT_OP(bits, rex2(in_reg0, RU::rsp.into()), sink); modrm_rr(in_reg0, RU::rsp.into(), sink); @@ -1652,7 +1768,7 @@ rcmp_sp = TailRecipe( # Bits 0-7 are the Jcc opcode. # Bits 8-15 control the test instruction which always has opcode byte 0x85. tjccb = TailRecipe( - 'tjccb', Branch, size=1 + 2, ins=GPR, outs=(), + 'tjccb', Branch, base_size=1 + 2, ins=GPR, outs=(), branch_range=8, emit=''' // test r, r. @@ -1664,7 +1780,7 @@ tjccb = TailRecipe( ''') tjccd = TailRecipe( - 'tjccd', Branch, size=1 + 6, ins=GPR, outs=(), + 'tjccd', Branch, base_size=1 + 6, ins=GPR, outs=(), branch_range=32, emit=''' // test r, r. @@ -1681,7 +1797,7 @@ tjccd = TailRecipe( # Same as tjccb, but only looks at the low 8 bits of the register, for b1 # types. t8jccb = TailRecipe( - 't8jccb', Branch, size=1 + 2, ins=GPR, outs=(), + 't8jccb', Branch, base_size=1 + 2, ins=GPR, outs=(), branch_range=8, requires_prefix=True, emit=''' @@ -1693,7 +1809,7 @@ t8jccb = TailRecipe( disp1(destination, func, sink); ''') t8jccb_abcd = TailRecipe( - 't8jccb_abcd', Branch, size=1 + 2, ins=ABCD, outs=(), + 't8jccb_abcd', Branch, base_size=1 + 2, ins=ABCD, outs=(), branch_range=8, when_prefixed=t8jccb, emit=''' @@ -1706,7 +1822,7 @@ t8jccb_abcd = TailRecipe( ''') t8jccd = TailRecipe( - 't8jccd', Branch, size=1 + 6, ins=GPR, outs=(), + 't8jccd', Branch, base_size=1 + 6, ins=GPR, outs=(), branch_range=32, requires_prefix=True, emit=''' @@ -1719,7 +1835,7 @@ t8jccd = TailRecipe( disp4(destination, func, sink); ''') t8jccd_abcd = TailRecipe( - 't8jccd_abcd', Branch, size=1 + 6, ins=ABCD, outs=(), + 't8jccd_abcd', Branch, base_size=1 + 6, ins=ABCD, outs=(), branch_range=32, when_prefixed=t8jccd, emit=''' @@ -1738,7 +1854,7 @@ t8jccd_abcd = TailRecipe( # any register, but is is larger because it uses a 32-bit test instruction with # a 0xff immediate. t8jccd_long = TailRecipe( - 't8jccd_long', Branch, size=5 + 6, ins=GPR, outs=(), + 't8jccd_long', Branch, base_size=5 + 6, ins=GPR, outs=(), branch_range=32, emit=''' // test32 r, 0xff. @@ -1769,7 +1885,7 @@ t8jccd_long = TailRecipe( # instruction, so it is limited to the `ABCD` register class for booleans. # The omission of a `when_prefixed` alternative is deliberate here. icscc = TailRecipe( - 'icscc', IntCompare, size=1 + 3, ins=(GPR, GPR), outs=ABCD, + 'icscc', IntCompare, base_size=1 + 3, ins=(GPR, GPR), outs=ABCD, emit=''' // Comparison instruction. PUT_OP(bits, rex2(in_reg0, in_reg1), sink); @@ -1794,7 +1910,7 @@ icscc = TailRecipe( ''') icscc_ib = TailRecipe( - 'icscc_ib', IntCompareImm, size=2 + 3, ins=GPR, outs=ABCD, + 'icscc_ib', IntCompareImm, base_size=2 + 3, ins=GPR, outs=ABCD, instp=IsSignedInt(IntCompareImm.imm, 8), emit=''' // Comparison instruction. @@ -1822,7 +1938,7 @@ icscc_ib = TailRecipe( ''') icscc_id = TailRecipe( - 'icscc_id', IntCompareImm, size=5 + 3, ins=GPR, outs=ABCD, + 'icscc_id', IntCompareImm, base_size=5 + 3, ins=GPR, outs=ABCD, instp=IsSignedInt(IntCompareImm.imm, 32), emit=''' // Comparison instruction. @@ -1864,7 +1980,7 @@ icscc_id = TailRecipe( # Not all floating point condition codes are supported. # The omission of a `when_prefixed` alternative is deliberate here. fcscc = TailRecipe( - 'fcscc', FloatCompare, size=1 + 3, ins=(FPR, FPR), outs=ABCD, + 'fcscc', FloatCompare, base_size=1 + 3, ins=(FPR, FPR), outs=ABCD, instp=floatccs(FloatCompare), emit=''' // Comparison instruction. diff --git a/lib/codegen/meta-python/isa/x86/registers.py b/lib/codegen/meta-python/isa/x86/registers.py index f463b93a46..3cca0bc377 100644 --- a/lib/codegen/meta-python/isa/x86/registers.py +++ b/lib/codegen/meta-python/isa/x86/registers.py @@ -46,14 +46,7 @@ FlagRegs = RegBank( names=['rflags']) GPR = RegClass(IntRegs) -# Certain types of deref encodings cannot be used with all registers. -# R13/RBP cannot be used with zero-offset load or store instructions. -# R12 cannot be used with a non-SIB-byte encoding of all derefs. -GPR_DEREF_SAFE = GPR.without(GPR.rsp, GPR.r12) -GPR_ZERO_DEREF_SAFE = GPR_DEREF_SAFE.without(GPR.rbp, GPR.r13) GPR8 = GPR[0:8] -GPR8_DEREF_SAFE = GPR8.without(GPR.rsp) -GPR8_ZERO_DEREF_SAFE = GPR8_DEREF_SAFE.without(GPR.rbp) ABCD = GPR[0:4] FPR = RegClass(FloatRegs) FPR8 = FPR[0:8] diff --git a/lib/codegen/src/binemit/relaxation.rs b/lib/codegen/src/binemit/relaxation.rs index 3b45a1c7b3..8616dccbbd 100644 --- a/lib/codegen/src/binemit/relaxation.rs +++ b/lib/codegen/src/binemit/relaxation.rs @@ -32,6 +32,7 @@ use cursor::{Cursor, FuncCursor}; use ir::{Function, InstructionData, Opcode}; use isa::{EncInfo, TargetIsa}; use iterators::IteratorExtras; +use regalloc::RegDiversions; use timing; use CodegenResult; @@ -51,6 +52,7 @@ pub fn relax_branches(func: &mut Function, isa: &TargetIsa) -> CodegenResult CodegenResult CodegenResult CodegenResult(&'a self, ebb: Ebb, encinfo: &EncInfo) -> InstOffsetIter<'a> { + pub fn inst_offsets<'a>( + &'a self, + func: &'a Function, + ebb: Ebb, + encinfo: &EncInfo, + ) -> InstOffsetIter<'a> { assert!( !self.offsets.is_empty(), "Code layout must be computed first" ); InstOffsetIter { encinfo: encinfo.clone(), + func, + divert: RegDiversions::new(), encodings: &self.encodings, offset: self.offsets[ebb], iter: self.layout.ebb_insts(ebb), @@ -226,6 +234,8 @@ impl fmt::Debug for Function { /// Iterator returning instruction offsets and sizes: `(offset, inst, size)`. pub struct InstOffsetIter<'a> { encinfo: EncInfo, + divert: RegDiversions, + func: &'a Function, encodings: &'a InstEncodings, offset: CodeOffset, iter: ir::layout::Insts<'a>, @@ -236,10 +246,13 @@ impl<'a> Iterator for InstOffsetIter<'a> { fn next(&mut self) -> Option { self.iter.next().map(|inst| { - let size = self.encinfo.bytes(self.encodings[inst]); + self.divert.apply(&self.func.dfg[inst]); + let byte_size = + self.encinfo + .byte_size(self.encodings[inst], inst, &self.divert, self.func); let offset = self.offset; - self.offset += size; - (offset, inst, size) + self.offset += byte_size; + (offset, inst, byte_size) }) } } diff --git a/lib/codegen/src/ir/stackslot.rs b/lib/codegen/src/ir/stackslot.rs index 41913ac0f0..c574986d46 100644 --- a/lib/codegen/src/ir/stackslot.rs +++ b/lib/codegen/src/ir/stackslot.rs @@ -63,7 +63,7 @@ pub enum StackSlotKind { /// An emergency spill slot. /// /// Emergency slots are allocated late when the register's constraint solver needs extra space - /// to shuffle registers around. The are only used briefly, and can be reused. + /// to shuffle registers around. They are only used briefly, and can be reused. EmergencySlot, } diff --git a/lib/codegen/src/isa/encoding.rs b/lib/codegen/src/isa/encoding.rs index 6bb7e30aec..589069b311 100644 --- a/lib/codegen/src/isa/encoding.rs +++ b/lib/codegen/src/isa/encoding.rs @@ -1,7 +1,9 @@ //! The `Encoding` struct. use binemit::CodeOffset; +use ir::{Function, Inst}; use isa::constraints::{BranchRange, RecipeConstraints}; +use regalloc::RegDiversions; use std::fmt; /// Bits needed to encode an instruction as binary machine code. @@ -78,12 +80,24 @@ impl fmt::Display for DisplayEncoding { } } +type SizeCalculatorFn = fn(&RecipeSizing, Inst, &RegDiversions, &Function) -> u8; + +/// Returns the base size of the Recipe, assuming it's fixed. This is the default for most +/// encodings; others can be variable and longer than this base size, depending on the registers +/// they're using and use a different function, specific per platform. +pub fn base_size(sizing: &RecipeSizing, _: Inst, _2: &RegDiversions, _3: &Function) -> u8 { + sizing.base_size +} + /// Code size information for an encoding recipe. /// /// All encoding recipes correspond to an exact instruction size. pub struct RecipeSizing { /// Size in bytes of instructions encoded with this recipe. - pub bytes: u8, + pub base_size: u8, + + /// Method computing the real instruction's size, given inputs and outputs. + pub compute_size: SizeCalculatorFn, /// Allowed branch range in this recipe, if any. /// @@ -118,13 +132,20 @@ impl EncInfo { } } - /// Get the exact size in bytes of instructions encoded with `enc`. + /// Get the precise size in bytes of instructions encoded with `enc`. /// /// Returns 0 for illegal encodings. - pub fn bytes(&self, enc: Encoding) -> CodeOffset { - self.sizing - .get(enc.recipe()) - .map_or(0, |s| CodeOffset::from(s.bytes)) + pub fn byte_size( + &self, + enc: Encoding, + inst: Inst, + divert: &RegDiversions, + func: &Function, + ) -> CodeOffset { + self.sizing.get(enc.recipe()).map_or(0, |s| { + let compute_size = s.compute_size; + CodeOffset::from(compute_size(&s, inst, divert, func)) + }) } /// Get the branch range that is supported by `enc`, if any. diff --git a/lib/codegen/src/isa/mod.rs b/lib/codegen/src/isa/mod.rs index c3766346b7..678395f43c 100644 --- a/lib/codegen/src/isa/mod.rs +++ b/lib/codegen/src/isa/mod.rs @@ -47,7 +47,7 @@ //! concurrent function compilations. pub use isa::constraints::{BranchRange, ConstraintKind, OperandConstraint, RecipeConstraints}; -pub use isa::encoding::{EncInfo, Encoding}; +pub use isa::encoding::{base_size, EncInfo, Encoding}; pub use isa::registers::{regs_overlap, RegClass, RegClassIndex, RegInfo, RegUnit}; pub use isa::stack::{StackBase, StackBaseMask, StackRef}; @@ -204,7 +204,7 @@ pub trait TargetIsa: fmt::Display { /// Get a data structure describing the registers in this ISA. fn register_info(&self) -> RegInfo; - /// Returns an iterartor over legal encodings for the instruction. + /// Returns an iterator over legal encodings for the instruction. fn legal_encodings<'a>( &'a self, func: &'a ir::Function, diff --git a/lib/codegen/src/isa/riscv/enc_tables.rs b/lib/codegen/src/isa/riscv/enc_tables.rs index 5f7c084a67..bbb7492805 100644 --- a/lib/codegen/src/isa/riscv/enc_tables.rs +++ b/lib/codegen/src/isa/riscv/enc_tables.rs @@ -5,7 +5,7 @@ use ir; use isa; use isa::constraints::*; use isa::enc_tables::*; -use isa::encoding::RecipeSizing; +use isa::encoding::{base_size, RecipeSizing}; // Include the generated encoding tables: // - `LEVEL1_RV32` diff --git a/lib/codegen/src/isa/x86/binemit.rs b/lib/codegen/src/isa/x86/binemit.rs index 2389595b2e..c930af39fd 100644 --- a/lib/codegen/src/isa/x86/binemit.rs +++ b/lib/codegen/src/isa/x86/binemit.rs @@ -1,5 +1,6 @@ //! Emitting binary x86 machine code. +use super::enc_tables::{needs_offset, needs_sib_byte}; use super::registers::RU; use binemit::{bad_encoding, CodeSink, Reloc}; use ir::condcodes::{CondCode, FloatCC, IntCC}; diff --git a/lib/codegen/src/isa/x86/enc_tables.rs b/lib/codegen/src/isa/x86/enc_tables.rs index 65b7d7c38e..9c52563a27 100644 --- a/lib/codegen/src/isa/x86/enc_tables.rs +++ b/lib/codegen/src/isa/x86/enc_tables.rs @@ -5,15 +5,73 @@ use bitset::BitSet; use cursor::{Cursor, FuncCursor}; use flowgraph::ControlFlowGraph; use ir::condcodes::IntCC; -use ir::{self, InstBuilder}; +use ir::{self, Function, Inst, InstBuilder}; use isa; use isa::constraints::*; use isa::enc_tables::*; +use isa::encoding::base_size; use isa::encoding::RecipeSizing; +use isa::RegUnit; +use regalloc::RegDiversions; include!(concat!(env!("OUT_DIR"), "/encoding-x86.rs")); include!(concat!(env!("OUT_DIR"), "/legalize-x86.rs")); +pub fn needs_sib_byte(reg: RegUnit) -> bool { + reg == RU::r12 as RegUnit || reg == RU::rsp as RegUnit +} +pub fn needs_offset(reg: RegUnit) -> bool { + reg == RU::r13 as RegUnit || reg == RU::rbp as RegUnit +} + +fn additional_size_if( + op_index: usize, + inst: Inst, + divert: &RegDiversions, + func: &Function, + condition_func: fn(RegUnit) -> bool, +) -> u8 { + let addr_reg = divert.reg(func.dfg.inst_args(inst)[op_index], &func.locations); + if condition_func(addr_reg) { + 1 + } else { + 0 + } +} + +fn size_plus_maybe_offset_for_in_reg_0( + sizing: &RecipeSizing, + inst: Inst, + divert: &RegDiversions, + func: &Function, +) -> u8 { + sizing.base_size + additional_size_if(0, inst, divert, func, needs_offset) +} +fn size_plus_maybe_offset_for_in_reg_1( + sizing: &RecipeSizing, + inst: Inst, + divert: &RegDiversions, + func: &Function, +) -> u8 { + sizing.base_size + additional_size_if(1, inst, divert, func, needs_offset) +} +fn size_plus_maybe_sib_for_in_reg_0( + sizing: &RecipeSizing, + inst: Inst, + divert: &RegDiversions, + func: &Function, +) -> u8 { + sizing.base_size + additional_size_if(0, inst, divert, func, needs_sib_byte) +} +fn size_plus_maybe_sib_for_in_reg_1( + sizing: &RecipeSizing, + inst: Inst, + divert: &RegDiversions, + func: &Function, +) -> u8 { + sizing.base_size + additional_size_if(1, inst, divert, func, needs_sib_byte) +} + /// Expand the `sdiv` and `srem` instructions using `x86_sdivmodx`. fn expand_sdivrem( inst: ir::Inst, diff --git a/lib/filetests/src/test_binemit.rs b/lib/filetests/src/test_binemit.rs index d8dfcb617d..da0f0eec09 100644 --- a/lib/filetests/src/test_binemit.rs +++ b/lib/filetests/src/test_binemit.rs @@ -149,7 +149,8 @@ impl SubTest for TestBinEmit { if opt_level == OptLevel::Best { // Get the smallest legal encoding - legal_encodings.min_by_key(|&e| encinfo.bytes(e)) + legal_encodings + .min_by_key(|&e| encinfo.byte_size(e, inst, &divert, &func)) } else { // If not optimizing, just use the first encoding. legal_encodings.next() @@ -204,7 +205,7 @@ impl SubTest for TestBinEmit { "Inconsistent {} header offset", ebb ); - for (offset, inst, enc_bytes) in func.inst_offsets(ebb, &encinfo) { + for (offset, inst, enc_bytes) in func.inst_offsets(&func, ebb, &encinfo) { assert_eq!(sink.offset, offset); sink.text.clear(); let enc = func.encodings[inst];