diff --git a/cranelift/filetests/isa/intel/binary32-float.cton b/cranelift/filetests/isa/intel/binary32-float.cton index a880ed9a8a..6ac14039de 100644 --- a/cranelift/filetests/isa/intel/binary32-float.cton +++ b/cranelift/filetests/isa/intel/binary32-float.cton @@ -185,6 +185,11 @@ ebb0: ; asm: movd 1032(%esp), %xmm2 [-,%xmm2] v211 = fill v201 ; bin: 66 0f 6e 94 24 00000408 + ; asm: movd %xmm5, 1032(%rsp) + regspill v100, %xmm5 -> ss1 ; bin: 66 0f 7e ac 24 00000408 + ; asm: movd 1032(%rsp), %xmm5 + regfill v100, ss1 -> %xmm5 ; bin: 66 0f 6e ac 24 00000408 + ; Comparisons. ; ; Only `supported_floatccs` are tested here. Others are handled by @@ -388,6 +393,11 @@ ebb0: ; asm: movq 1032(%esp), %xmm2 [-,%xmm2] v211 = fill v201 ; bin: f3 0f 7e 94 24 00000408 + ; asm: movq %xmm5, 1032(%rsp) + regspill v100, %xmm5 -> ss1 ; bin: 66 0f d6 ac 24 00000408 + ; asm: movq 1032(%rsp), %xmm5 + regfill v100, ss1 -> %xmm5 ; bin: f3 0f 7e ac 24 00000408 + ; Comparisons. ; ; Only `supported_floatccs` are tested here. Others are handled by diff --git a/cranelift/filetests/isa/intel/binary32.cton b/cranelift/filetests/isa/intel/binary32.cton index ee8c79e24f..e777cfb359 100644 --- a/cranelift/filetests/isa/intel/binary32.cton +++ b/cranelift/filetests/isa/intel/binary32.cton @@ -363,6 +363,11 @@ ebb0: ; asm: movl 1032(%esp), %esi [-,%rsi] v511 = fill v501 ; bin: 8b b4 24 00000408 + ; asm: movl %ecx, 1032(%esp) + regspill v1, %rcx -> ss1 ; bin: 89 8c 24 00000408 + ; asm: movl 1032(%esp), %ecx + regfill v1, ss1 -> %rcx ; bin: 8b 8c 24 00000408 + ; asm: testl %ecx, %ecx ; asm: je ebb1 brz v1, ebb1 ; bin: 85 c9 74 0e diff --git a/cranelift/filetests/isa/intel/binary64-float.cton b/cranelift/filetests/isa/intel/binary64-float.cton index 18a24f9e7e..4ef385bbb2 100644 --- a/cranelift/filetests/isa/intel/binary64-float.cton +++ b/cranelift/filetests/isa/intel/binary64-float.cton @@ -194,6 +194,11 @@ ebb0: ; asm: movd 1032(%rsp), %xmm10 [-,%xmm10] v211 = fill v201 ; bin: 66 44 0f 6e 94 24 00000408 + ; asm: movd %xmm5, 1032(%rsp) + regspill v100, %xmm5 -> ss1 ; bin: 66 0f 7e ac 24 00000408 + ; asm: movd 1032(%rsp), %xmm5 + regfill v100, ss1 -> %xmm5 ; bin: 66 0f 6e ac 24 00000408 + ; Comparisons. ; ; Only `supported_floatccs` are tested here. Others are handled by @@ -412,6 +417,11 @@ ebb0: ; asm: movq 1032(%rsp), %xmm10 [-,%xmm10] v211 = fill v201 ; bin: f3 44 0f 7e 94 24 00000408 + ; asm: movq %xmm5, 1032(%rsp) + regspill v100, %xmm5 -> ss1 ; bin: 66 0f d6 ac 24 00000408 + ; asm: movq 1032(%rsp), %xmm5 + regfill v100, ss1 -> %xmm5 ; bin: f3 0f 7e ac 24 00000408 + ; Comparisons. ; ; Only `supported_floatccs` are tested here. Others are handled by diff --git a/cranelift/filetests/isa/intel/binary64.cton b/cranelift/filetests/isa/intel/binary64.cton index c9febfdab0..03cb54e467 100644 --- a/cranelift/filetests/isa/intel/binary64.cton +++ b/cranelift/filetests/isa/intel/binary64.cton @@ -459,6 +459,11 @@ ebb0: ; asm: movq 1032(%rsp), %r10 [-,%r10] v512 = fill v502 ; bin: 4c 8b 94 24 00000408 + ; asm: movq %rcx, 1032(%rsp) + regspill v1, %rcx -> ss1 ; bin: 48 89 8c 24 00000408 + ; asm: movq 1032(%rsp), %rcx + regfill v1, ss1 -> %rcx ; bin: 48 8b 8c 24 00000408 + ; asm: testq %rcx, %rcx ; asm: je ebb1 brz v1, ebb1 ; bin: 48 85 c9 74 1b @@ -850,6 +855,11 @@ ebb0: ; asm: movl 1032(%rsp), %r10d [-,%r10] v512 = fill v502 ; bin: 44 8b 94 24 00000408 + ; asm: movl %ecx, 1032(%rsp) + regspill v1, %rcx -> ss1 ; bin: 89 8c 24 00000408 + ; asm: movl 1032(%rsp), %ecx + regfill v1, ss1 -> %rcx ; bin: 8b 8c 24 00000408 + ; asm: testl %ecx, %ecx ; asm: je ebb1x brz v1, ebb1 ; bin: 85 c9 74 18 diff --git a/lib/cretonne/meta/gen_binemit.py b/lib/cretonne/meta/gen_binemit.py index 9e2bac451f..8c591f0513 100644 --- a/lib/cretonne/meta/gen_binemit.py +++ b/lib/cretonne/meta/gen_binemit.py @@ -8,7 +8,7 @@ import srcgen try: from typing import Sequence, List # noqa - from cdsl.isa import TargetISA, EncRecipe # noqa + from cdsl.isa import TargetISA, EncRecipe, OperandConstraint # noqa except ImportError: pass @@ -49,36 +49,17 @@ def gen_recipe(recipe, fmt): fmt.line('..') fmt.outdented_line('} = func.dfg[inst] {') + # Pass recipe arguments in this order: inputs, imm_fields, outputs. + args = '' + # Normalize to an `args` array. if want_args and not is_regmove: if iform.has_value_list: fmt.line('let args = args.as_slice(&func.dfg.value_lists);') elif nvops == 1: fmt.line('let args = [arg];') + args += unwrap_values(recipe.ins, 'in', 'args', fmt) - # Unwrap interesting input arguments. - # Don't bother with fixed registers. - args = '' - for i, arg in enumerate(recipe.ins): - if isinstance(arg, RegClass) and not is_regmove: - v = 'in_reg{}'.format(i) - args += ', ' + v - fmt.line( - 'let {} = divert.reg(args[{}], &func.locations);' - .format(v, i)) - elif isinstance(arg, Stack): - v = 'in_stk{}'.format(i) - args += ', ' + v - with fmt.indented( - 'let {} = StackRef::masked('.format(v), - ').unwrap();'): - fmt.format( - 'func.locations[args[{}]].unwrap_stack(),', - i) - fmt.format('{},', arg.stack_base_mask()) - fmt.line('&func.stack_slots,') - - # Pass arguments in this order: inputs, imm_fields, outputs. for f in iform.imm_fields: args += ', ' + f.member @@ -88,24 +69,7 @@ def gen_recipe(recipe, fmt): fmt.line('let results = [func.dfg.first_result(inst)];') else: fmt.line('let results = func.dfg.inst_results(inst);') - for i, res in enumerate(recipe.outs): - if isinstance(res, RegClass): - v = 'out_reg{}'.format(i) - args += ', ' + v - fmt.format( - 'let {} = func.locations[results[{}]].unwrap_reg();', - v, i) - elif isinstance(res, Stack): - v = 'out_stk{}'.format(i) - args += ', ' + v - with fmt.indented( - 'let {} = StackRef::masked('.format(v), - ').unwrap();'): - fmt.format( - 'func.locations[results[{}]].unwrap_stack(),', - i) - fmt.format('{},', res.stack_base_mask()) - fmt.line('&func.stack_slots,') + args += unwrap_values(recipe.outs, 'out', 'results', fmt) # Special handling for regmove instructions. Update the register # diversion tracker. @@ -128,6 +92,36 @@ def gen_recipe(recipe, fmt): fmt.line('return;') +def unwrap_values(args, prefix, values, fmt): + # type: (Sequence[OperandConstraint], str, str, srcgen.Formatter) -> str # noqa + """ + Emit code that unwraps values living in registers or stack slots. + + :param args: Input or output constraints. + :param prefix: Prefix to be used for the generated local variables. + :param values: Name of slice containing the values to be unwrapped. + :returns: Comma separated list of the generated variables + """ + varlist = '' + for i, cst in enumerate(args): + if isinstance(cst, RegClass): + v = '{}_reg{}'.format(prefix, i) + varlist += ', ' + v + fmt.format( + 'let {} = divert.reg({}[{}], &func.locations);', + v, values, i) + elif isinstance(cst, Stack): + v = '{}_stk{}'.format(prefix, i) + varlist += ', ' + v + with fmt.indented( + 'let {} = StackRef::masked('.format(v), + ').unwrap();'): + fmt.format('divert.stack({}[{}], &func.locations),', values, i) + fmt.format('{},', cst.stack_base_mask()) + fmt.line('&func.stack_slots,') + return varlist + + def gen_isa(isa, fmt): # type: (TargetISA, srcgen.Formatter) -> None """ diff --git a/lib/cretonne/meta/isa/intel/encodings.py b/lib/cretonne/meta/isa/intel/encodings.py index db5c9db4ce..6d79ca726e 100644 --- a/lib/cretonne/meta/isa/intel/encodings.py +++ b/lib/cretonne/meta/isa/intel/encodings.py @@ -193,6 +193,7 @@ enc_i32_i64_ld_st(base.store, True, r.stDisp8, 0x89) enc_i32_i64_ld_st(base.store, True, r.stDisp32, 0x89) enc_i32_i64(base.spill, r.spSib32, 0x89) +enc_i32_i64(base.regspill, r.rsp32, 0x89) enc_i64(base.istore32.i64.any, r.st, 0x89) enc_i64(base.istore32.i64.any, r.stDisp8, 0x89) @@ -222,6 +223,7 @@ enc_i32_i64_ld_st(base.load, True, r.ldDisp8, 0x8b) enc_i32_i64_ld_st(base.load, True, r.ldDisp32, 0x8b) enc_i32_i64(base.fill, r.fiSib32, 0x8b) +enc_i32_i64(base.regfill, r.rfi32, 0x8b) enc_i64(base.uload32.i64, r.ld, 0x8b) enc_i64(base.uload32.i64, r.ldDisp8, 0x8b) @@ -268,10 +270,14 @@ enc_both(base.store.f64.any, r.fstDisp8, 0x66, 0x0f, 0xd6) enc_both(base.store.f64.any, r.fstDisp32, 0x66, 0x0f, 0xd6) enc_both(base.fill.f32, r.ffiSib32, 0x66, 0x0f, 0x6e) +enc_both(base.regfill.f32, r.frfi32, 0x66, 0x0f, 0x6e) enc_both(base.fill.f64, r.ffiSib32, 0xf3, 0x0f, 0x7e) +enc_both(base.regfill.f64, r.frfi32, 0xf3, 0x0f, 0x7e) enc_both(base.spill.f32, r.fspSib32, 0x66, 0x0f, 0x7e) +enc_both(base.regspill.f32, r.frsp32, 0x66, 0x0f, 0x7e) enc_both(base.spill.f64, r.fspSib32, 0x66, 0x0f, 0xd6) +enc_both(base.regspill.f64, r.frsp32, 0x66, 0x0f, 0xd6) # # Function addresses. diff --git a/lib/cretonne/meta/isa/intel/recipes.py b/lib/cretonne/meta/isa/intel/recipes.py index fb5104381f..9c4713500a 100644 --- a/lib/cretonne/meta/isa/intel/recipes.py +++ b/lib/cretonne/meta/isa/intel/recipes.py @@ -8,7 +8,8 @@ from cdsl.registers import RegClass from base.formats import Unary, UnaryImm, Binary, BinaryImm, MultiAry from base.formats import Trap, Call, IndirectCall, Store, Load from base.formats import IntCompare, FloatCompare -from base.formats import RegMove, Ternary, Jump, Branch, FuncAddr +from base.formats import Ternary, Jump, Branch, FuncAddr +from base.formats import RegMove, RegSpill, RegFill from .registers import GPR, ABCD, FPR, GPR8, FPR8, StackGPR32, StackFPR32 from .defs import supported_floatccs @@ -570,6 +571,28 @@ fspSib32 = TailRecipe( sink.put4(out_stk0.offset as u32); ''') +# Regspill using RSP-relative addressing. +rsp32 = TailRecipe( + 'rsp32', RegSpill, size=6, ins=GPR, outs=(), + emit=''' + let dst = StackRef::sp(dst, &func.stack_slots); + let base = stk_base(dst.base); + PUT_OP(bits, rex2(base, src), sink); + modrm_sib_disp32(src, sink); + sib_noindex(base, sink); + sink.put4(dst.offset as u32); + ''') +frsp32 = TailRecipe( + 'frsp32', RegSpill, size=6, ins=FPR, outs=(), + emit=''' + let dst = StackRef::sp(dst, &func.stack_slots); + let base = stk_base(dst.base); + PUT_OP(bits, rex2(base, src), sink); + modrm_sib_disp32(src, sink); + sib_noindex(base, sink); + sink.put4(dst.offset as u32); + ''') + # # Load recipes # @@ -656,6 +679,28 @@ ffiSib32 = TailRecipe( sink.put4(in_stk0.offset as u32); ''') +# Regfill with RSP-relative 32-bit displacement. +rfi32 = TailRecipe( + 'rfi32', RegFill, size=6, ins=StackGPR32, outs=(), + emit=''' + let src = StackRef::sp(src, &func.stack_slots); + let base = stk_base(src.base); + PUT_OP(bits, rex2(base, dst), sink); + modrm_sib_disp32(dst, sink); + sib_noindex(base, sink); + sink.put4(src.offset as u32); + ''') +frfi32 = TailRecipe( + 'frfi32', RegFill, size=6, ins=StackFPR32, outs=(), + emit=''' + let src = StackRef::sp(src, &func.stack_slots); + let base = stk_base(src.base); + PUT_OP(bits, rex2(base, dst), sink); + modrm_sib_disp32(dst, sink); + sib_noindex(base, sink); + sink.put4(src.offset as u32); + ''') + # # Call/return # diff --git a/lib/cretonne/src/isa/stack.rs b/lib/cretonne/src/isa/stack.rs index cfb054dd66..ea7ee37248 100644 --- a/lib/cretonne/src/isa/stack.rs +++ b/lib/cretonne/src/isa/stack.rs @@ -24,25 +24,28 @@ pub struct StackRef { impl StackRef { /// Get a reference to the stack slot `ss` using one of the base pointers in `mask`. pub fn masked(ss: StackSlot, mask: StackBaseMask, frame: &StackSlots) -> Option { - let size = frame.frame_size.expect( - "Stack layout must be computed before referencing stack slots", - ); - // Offsets relative to the caller's stack pointer. - let offset = frame[ss].offset; - // Try an SP-relative reference. if mask.contains(StackBase::SP) { - // Offset where SP is pointing. (All ISAs have stacks growing downwards.) - let sp_offset = -(size as StackOffset); - return Some(StackRef { - base: StackBase::SP, - offset: offset - sp_offset, - }); + return Some(StackRef::sp(ss, frame)); } // No reference possible with this mask. None } + + /// Get a reference to `ss` using the stack pointer as a base. + pub fn sp(ss: StackSlot, frame: &StackSlots) -> StackRef { + let size = frame.frame_size.expect( + "Stack layout must be computed before referencing stack slots", + ); + + // Offset where SP is pointing. (All ISAs have stacks growing downwards.) + let sp_offset = -(size as StackOffset); + return StackRef { + base: StackBase::SP, + offset: frame[ss].offset - sp_offset, + }; + } } /// Generic base register for referencing stack slots.