From 29dfcf5dfbf468e3ef3415e840b2666388c90324 Mon Sep 17 00:00:00 2001 From: Jakob Stoklund Olesen Date: Fri, 22 Sep 2017 15:35:11 -0700 Subject: [PATCH] Add spill/fill encodings for Intel ISAs. To begin with, these are catch-all encodings with a SIB byte and a 32-bit displacement, so they can access any stack slot via both the stack pointer and the frame pointer. In the future, we will add encodings for 8-bit displacements as well as EBP-relative references without a SIB byte. --- .../filetests/isa/intel/binary32-float.cton | 34 ++++++++++++++ cranelift/filetests/isa/intel/binary32.cton | 17 +++++++ .../filetests/isa/intel/binary64-float.cton | 37 +++++++++++++++- cranelift/filetests/isa/intel/binary64.cton | 44 +++++++++++++++++++ lib/cretonne/meta/isa/intel/encodings.py | 12 ++++- lib/cretonne/meta/isa/intel/recipes.py | 42 +++++++++++++++++- lib/cretonne/meta/isa/intel/registers.py | 8 +++- lib/cretonne/src/isa/intel/binemit.rs | 29 +++++++++++- 8 files changed, 217 insertions(+), 6 deletions(-) diff --git a/cranelift/filetests/isa/intel/binary32-float.cton b/cranelift/filetests/isa/intel/binary32-float.cton index 3756fbff1e..c5a1b08a0e 100644 --- a/cranelift/filetests/isa/intel/binary32-float.cton +++ b/cranelift/filetests/isa/intel/binary32-float.cton @@ -8,6 +8,11 @@ isa intel has_sse2 ; function %F32() { + ss0 = incoming_arg 8, offset 0 + ss1 = incoming_arg 1024, offset -1024 + ss2 = incoming_arg 1024, offset -2048 + ss3 = incoming_arg 8, offset -2056 + ebb0: [-,%rcx] v0 = iconst.i32 1 [-,%rsi] v1 = iconst.i32 2 @@ -105,10 +110,27 @@ ebb0: ; asm: movd %xmm2, -10000(%esi) [-] store.f32 v101, v1-10000 ; bin: 66 0f 7e 96 ffffd8f0 + ; Spill / Fill. + + ; asm: movd %xmm5, 1032(%esp) + [-,ss1] v200 = spill v100 ; bin: 66 0f 7e ac 24 00000408 + ; asm: movd %xmm2, 1032(%esp) + [-,ss1] v201 = spill v101 ; bin: 66 0f 7e 94 24 00000408 + + ; asm: movd 1032(%esp), %xmm5 + [-,%xmm5] v210 = fill v200 ; bin: 66 0f 6e ac 24 00000408 + ; asm: movd 1032(%esp), %xmm2 + [-,%xmm2] v211 = fill v201 ; bin: 66 0f 6e 94 24 00000408 + return } function %F64() { + ss0 = incoming_arg 8, offset 0 + ss1 = incoming_arg 1024, offset -1024 + ss2 = incoming_arg 1024, offset -2048 + ss3 = incoming_arg 8, offset -2056 + ebb0: [-,%rcx] v0 = iconst.i32 1 [-,%rsi] v1 = iconst.i32 2 @@ -198,5 +220,17 @@ ebb0: ; asm: movq %xmm2, -10000(%esi) [-] store.f64 v101, v1-10000 ; bin: 66 0f d6 96 ffffd8f0 + ; Spill / Fill. + + ; asm: movq %xmm5, 1032(%esp) + [-,ss1] v200 = spill v100 ; bin: 66 0f d6 ac 24 00000408 + ; asm: movq %xmm2, 1032(%esp) + [-,ss1] v201 = spill v101 ; bin: 66 0f d6 94 24 00000408 + + ; asm: movq 1032(%esp), %xmm5 + [-,%xmm5] v210 = fill v200 ; bin: f3 0f 7e ac 24 00000408 + ; asm: movq 1032(%esp), %xmm2 + [-,%xmm2] v211 = fill v201 ; bin: f3 0f 7e 94 24 00000408 + return } diff --git a/cranelift/filetests/isa/intel/binary32.cton b/cranelift/filetests/isa/intel/binary32.cton index 16f8131cef..ee8c79e24f 100644 --- a/cranelift/filetests/isa/intel/binary32.cton +++ b/cranelift/filetests/isa/intel/binary32.cton @@ -11,6 +11,11 @@ function %I32() { fn0 = function %foo() sig0 = () + ss0 = incoming_arg 8, offset 0 + ss1 = incoming_arg 1024, offset -1024 + ss2 = incoming_arg 1024, offset -2048 + ss3 = incoming_arg 8, offset -2056 + ebb0: ; asm: movl $1, %ecx [-,%rcx] v1 = iconst.i32 1 ; bin: b9 00000001 @@ -346,6 +351,18 @@ ebb0: ; asm: call *%esi call_indirect sig0, v401() ; bin: ff d6 + ; Spill / Fill. + + ; asm: movl %ecx, 1032(%esp) + [-,ss1] v500 = spill v1 ; bin: 89 8c 24 00000408 + ; asm: movl %esi, 1032(%esp) + [-,ss1] v501 = spill v2 ; bin: 89 b4 24 00000408 + + ; asm: movl 1032(%esp), %ecx + [-,%rcx] v510 = fill v500 ; bin: 8b 8c 24 00000408 + ; asm: movl 1032(%esp), %esi + [-,%rsi] v511 = fill v501 ; bin: 8b b4 24 00000408 + ; asm: testl %ecx, %ecx ; asm: je ebb1 brz v1, ebb1 ; bin: 85 c9 74 0e diff --git a/cranelift/filetests/isa/intel/binary64-float.cton b/cranelift/filetests/isa/intel/binary64-float.cton index 83aaf6d753..a5b4aaa163 100644 --- a/cranelift/filetests/isa/intel/binary64-float.cton +++ b/cranelift/filetests/isa/intel/binary64-float.cton @@ -1,6 +1,7 @@ ; Binary emission of 64-bit floating point code. test binemit set is_64bit +set is_compressed isa intel has_sse2 ; The binary encodings can be verified with the command: @@ -9,6 +10,11 @@ isa intel has_sse2 ; function %F32() { + ss0 = incoming_arg 8, offset 0 + ss1 = incoming_arg 1024, offset -1024 + ss2 = incoming_arg 1024, offset -2048 + ss3 = incoming_arg 8, offset -2056 + ebb0: [-,%r11] v0 = iconst.i32 1 [-,%rsi] v1 = iconst.i32 2 @@ -36,7 +42,7 @@ ebb0: [-,%xmm10] v17 = bitcast.f32 v1 ; bin: 66 44 0f 6e d6 ; asm: movd %xmm5, %ecx - [-,%rcx] v18 = bitcast.i32 v10 ; bin: 66 40 0f 7e e9 + [-,%rcx] v18 = bitcast.i32 v10 ; bin: 66 0f 7e e9 ; asm: movd %xmm10, %esi [-,%rsi] v19 = bitcast.i32 v11 ; bin: 66 44 0f 7e d6 @@ -113,10 +119,27 @@ ebb0: ; asm: movd %xmm10, -10000(%rax) [-] store.f32 v101, v2-10000 ; bin: 66 44 0f 7e 90 ffffd8f0 + ; Spill / Fill. + + ; asm: movd %xmm5, 1032(%rsp) + [-,ss1] v200 = spill v100 ; bin: 66 0f 7e ac 24 00000408 + ; asm: movd %xmm10, 1032(%rsp) + [-,ss1] v201 = spill v101 ; bin: 66 44 0f 7e 94 24 00000408 + + ; asm: movd 1032(%rsp), %xmm5 + [-,%xmm5] v210 = fill v200 ; bin: 66 0f 6e ac 24 00000408 + ; asm: movd 1032(%rsp), %xmm10 + [-,%xmm10] v211 = fill v201 ; bin: 66 44 0f 6e 94 24 00000408 + return } function %F64() { + ss0 = incoming_arg 8, offset 0 + ss1 = incoming_arg 1024, offset -1024 + ss2 = incoming_arg 1024, offset -2048 + ss3 = incoming_arg 8, offset -2056 + ebb0: [-,%r11] v0 = iconst.i32 1 [-,%rsi] v1 = iconst.i32 2 @@ -221,5 +244,17 @@ ebb0: ; asm: movq %xmm10, -10000(%rax) [-] store.f64 v101, v2-10000 ; bin: 66 44 0f d6 90 ffffd8f0 + ; Spill / Fill. + + ; asm: movq %xmm5, 1032(%rsp) + [-,ss1] v200 = spill v100 ; bin: 66 0f d6 ac 24 00000408 + ; asm: movq %xmm10, 1032(%rsp) + [-,ss1] v201 = spill v101 ; bin: 66 44 0f d6 94 24 00000408 + + ; asm: movq 1032(%rsp), %xmm5 + [-,%xmm5] v210 = fill v200 ; bin: f3 0f 7e ac 24 00000408 + ; asm: movq 1032(%rsp), %xmm10 + [-,%xmm10] v211 = fill v201 ; bin: f3 44 0f 7e 94 24 00000408 + return } diff --git a/cranelift/filetests/isa/intel/binary64.cton b/cranelift/filetests/isa/intel/binary64.cton index 8b22173fe4..c9febfdab0 100644 --- a/cranelift/filetests/isa/intel/binary64.cton +++ b/cranelift/filetests/isa/intel/binary64.cton @@ -14,6 +14,13 @@ function %I64() { fn0 = function %foo() sig0 = () + ; Use incoming_arg stack slots because they won't be relocated by the frame + ; layout. + ss0 = incoming_arg 8, offset 0 + ss1 = incoming_arg 1024, offset -1024 + ss2 = incoming_arg 1024, offset -2048 + ss3 = incoming_arg 8, offset -2056 + ebb0: ; Integer Constants. @@ -436,6 +443,22 @@ ebb0: ; asm: call *%r10 call_indirect sig0, v402() ; bin: 41 ff d2 + ; Spill / Fill. + + ; asm: movq %rcx, 1032(%rsp) + [-,ss1] v500 = spill v1 ; bin: 48 89 8c 24 00000408 + ; asm: movq %rsi, 1032(%rsp) + [-,ss1] v501 = spill v2 ; bin: 48 89 b4 24 00000408 + ; asm: movq %r10, 1032(%rsp) + [-,ss1] v502 = spill v3 ; bin: 4c 89 94 24 00000408 + + ; asm: movq 1032(%rsp), %rcx + [-,%rcx] v510 = fill v500 ; bin: 48 8b 8c 24 00000408 + ; asm: movq 1032(%rsp), %rsi + [-,%rsi] v511 = fill v501 ; bin: 48 8b b4 24 00000408 + ; asm: movq 1032(%rsp), %r10 + [-,%r10] v512 = fill v502 ; bin: 4c 8b 94 24 00000408 + ; asm: testq %rcx, %rcx ; asm: je ebb1 brz v1, ebb1 ; bin: 48 85 c9 74 1b @@ -477,6 +500,11 @@ function %I32() { fn0 = function %foo() sig0 = () + ss0 = incoming_arg 8, offset 0 + ss1 = incoming_arg 1024, offset -1024 + ss2 = incoming_arg 1024, offset -2048 + ss3 = incoming_arg 8, offset -2056 + ebb0: ; Integer Constants. @@ -806,6 +834,22 @@ ebb0: ; asm: movzbl %dl, %esi [-,%rsi] v351 = bint.i32 v301 ; bin: 0f b6 f2 + ; Spill / Fill. + + ; asm: movl %ecx, 1032(%rsp) + [-,ss1] v500 = spill v1 ; bin: 89 8c 24 00000408 + ; asm: movl %esi, 1032(%rsp) + [-,ss1] v501 = spill v2 ; bin: 89 b4 24 00000408 + ; asm: movl %r10d, 1032(%rsp) + [-,ss1] v502 = spill v3 ; bin: 44 89 94 24 00000408 + + ; asm: movl 1032(%rsp), %ecx + [-,%rcx] v510 = fill v500 ; bin: 8b 8c 24 00000408 + ; asm: movl 1032(%rsp), %esi + [-,%rsi] v511 = fill v501 ; bin: 8b b4 24 00000408 + ; asm: movl 1032(%rsp), %r10d + [-,%r10] v512 = fill v502 ; bin: 44 8b 94 24 00000408 + ; asm: testl %ecx, %ecx ; asm: je ebb1x brz v1, ebb1 ; bin: 85 c9 74 18 diff --git a/lib/cretonne/meta/isa/intel/encodings.py b/lib/cretonne/meta/isa/intel/encodings.py index 117940eb39..4b646658c1 100644 --- a/lib/cretonne/meta/isa/intel/encodings.py +++ b/lib/cretonne/meta/isa/intel/encodings.py @@ -65,7 +65,7 @@ def enc_i32_i64_ld_st(inst, w_bit, recipe, *args, **kwargs): Add encodings for `inst.i32` to I32. Add encodings for `inst.i32` to I64 with and without REX. Add encodings for `inst.i64` to I64 with a REX prefix, using the `w_bit` - argument to determine wheter or not to set the REX.W bit. + argument to determine whether or not to set the REX.W bit. """ I32.enc(inst.i32.any, *recipe(*args, **kwargs)) @@ -181,6 +181,8 @@ enc_i32_i64_ld_st(base.store, True, r.st, 0x89) enc_i32_i64_ld_st(base.store, True, r.stDisp8, 0x89) enc_i32_i64_ld_st(base.store, True, r.stDisp32, 0x89) +enc_i32_i64(base.spill, r.spSib32, 0x89) + enc_i64(base.istore32.i64.any, r.st, 0x89) enc_i64(base.istore32.i64.any, r.stDisp8, 0x89) enc_i64(base.istore32.i64.any, r.stDisp32, 0x89) @@ -208,6 +210,8 @@ enc_i32_i64_ld_st(base.load, True, r.ld, 0x8b) enc_i32_i64_ld_st(base.load, True, r.ldDisp8, 0x8b) enc_i32_i64_ld_st(base.load, True, r.ldDisp32, 0x8b) +enc_i32_i64(base.fill, r.fiSib32, 0x8b) + enc_i64(base.uload32.i64, r.ld, 0x8b) enc_i64(base.uload32.i64, r.ldDisp8, 0x8b) enc_i64(base.uload32.i64, r.ldDisp32, 0x8b) @@ -252,6 +256,12 @@ enc_flt(base.store.f64.any, r.fst, 0x66, 0x0f, 0xd6) enc_flt(base.store.f64.any, r.fstDisp8, 0x66, 0x0f, 0xd6) enc_flt(base.store.f64.any, r.fstDisp32, 0x66, 0x0f, 0xd6) +enc_flt(base.fill.f32, r.ffiSib32, 0x66, 0x0f, 0x6e) +enc_flt(base.fill.f64, r.ffiSib32, 0xf3, 0x0f, 0x7e) + +enc_flt(base.spill.f32, r.fspSib32, 0x66, 0x0f, 0x7e) +enc_flt(base.spill.f64, r.fspSib32, 0x66, 0x0f, 0xd6) + # # Function addresses. # diff --git a/lib/cretonne/meta/isa/intel/recipes.py b/lib/cretonne/meta/isa/intel/recipes.py index 89556a4049..358f9cae72 100644 --- a/lib/cretonne/meta/isa/intel/recipes.py +++ b/lib/cretonne/meta/isa/intel/recipes.py @@ -8,7 +8,7 @@ from base.formats import Unary, UnaryImm, Binary, BinaryImm, MultiAry from base.formats import Trap, Call, IndirectCall, Store, Load from base.formats import IntCompare from base.formats import RegMove, Ternary, Jump, Branch, FuncAddr -from .registers import GPR, ABCD, FPR, GPR8, FPR8 +from .registers import GPR, ABCD, FPR, GPR8, FPR8, StackGPR32, StackFPR32 try: from typing import Tuple, Dict, Sequence # noqa @@ -474,6 +474,26 @@ fstDisp32 = TailRecipe( sink.put4(offset as u32); ''') +# Unary spill with SIB and 32-bit displacement. +spSib32 = TailRecipe( + 'spSib32', Unary, size=6, ins=GPR, outs=StackGPR32, + emit=''' + let base = stk_base(out_stk0.base); + PUT_OP(bits, rex2(base, in_reg0), sink); + modrm_sib_disp32(in_reg0, sink); + sib_noindex(base, sink); + sink.put4(out_stk0.offset as u32); + ''') +fspSib32 = TailRecipe( + 'fspSib32', Unary, size=6, ins=FPR, outs=StackFPR32, + emit=''' + let base = stk_base(out_stk0.base); + PUT_OP(bits, rex2(base, in_reg0), sink); + modrm_sib_disp32(in_reg0, sink); + sib_noindex(base, sink); + sink.put4(out_stk0.offset as u32); + ''') + # # Load recipes # @@ -540,6 +560,26 @@ fldDisp32 = TailRecipe( sink.put4(offset as u32); ''') +# Unary fill with SIB and 32-bit displacement. +fiSib32 = TailRecipe( + 'fiSib32', Unary, size=6, ins=StackGPR32, outs=GPR, + emit=''' + let base = stk_base(in_stk0.base); + PUT_OP(bits, rex2(base, out_reg0), sink); + modrm_sib_disp32(out_reg0, sink); + sib_noindex(base, sink); + sink.put4(in_stk0.offset as u32); + ''') +ffiSib32 = TailRecipe( + 'ffiSib32', Unary, size=6, ins=StackFPR32, outs=FPR, + emit=''' + let base = stk_base(in_stk0.base); + PUT_OP(bits, rex2(base, out_reg0), sink); + modrm_sib_disp32(out_reg0, sink); + sib_noindex(base, sink); + sink.put4(in_stk0.offset as u32); + ''') + # # Call/return # diff --git a/lib/cretonne/meta/isa/intel/registers.py b/lib/cretonne/meta/isa/intel/registers.py index 62966aac3b..886812d6ce 100644 --- a/lib/cretonne/meta/isa/intel/registers.py +++ b/lib/cretonne/meta/isa/intel/registers.py @@ -23,7 +23,7 @@ data types, and the H-registers even less so. Rather than trying to model the H-registers accurately, we'll avoid using them in both I32 and I64 modes. """ from __future__ import absolute_import -from cdsl.registers import RegBank, RegClass +from cdsl.registers import RegBank, RegClass, Stack from .defs import ISA @@ -44,4 +44,10 @@ ABCD = GPR[0:4] FPR = RegClass(FloatRegs) FPR8 = FPR[0:8] +# Constraints for stack operands. + +# Stack operand with a 32-bit signed displacement from either RBP or RSP. +StackGPR32 = Stack(GPR) +StackFPR32 = Stack(FPR) + RegClass.extract_names(globals()) diff --git a/lib/cretonne/src/isa/intel/binemit.rs b/lib/cretonne/src/isa/intel/binemit.rs index 6f75833be6..888128caed 100644 --- a/lib/cretonne/src/isa/intel/binemit.rs +++ b/lib/cretonne/src/isa/intel/binemit.rs @@ -2,8 +2,9 @@ use binemit::{CodeSink, Reloc, bad_encoding}; use ir::{Function, Inst, Ebb, InstructionData}; -use isa::RegUnit; +use isa::{RegUnit, StackRef, StackBase, StackBaseMask}; use regalloc::RegDiversions; +use super::registers::RU; include!(concat!(env!("OUT_DIR"), "/binemit-intel.rs")); @@ -27,6 +28,16 @@ impl Into for RelocKind { } } +// Convert a stack base to the corresponding register. +fn stk_base(base: StackBase) -> RegUnit { + let ru = match base { + StackBase::SP => RU::rsp, + StackBase::FP => RU::rbp, + StackBase::Zone => unimplemented!(), + }; + ru as RegUnit +} + // Mandatory prefix bytes for Mp* opcodes. const PREFIX: [u8; 3] = [0x66, 0xf3, 0xf2]; @@ -43,7 +54,7 @@ fn rex1(reg_b: RegUnit) -> u8 { // Create a dual-register REX prefix, setting: // -// REX.B = bit 3 of r/m register. +// REX.B = bit 3 of r/m register, or SIB base register when a SIB byte is present. // REX.R = bit 3 of reg register. fn rex2(rm: RegUnit, reg: RegUnit) -> u8 { let b = ((rm >> 3) & 1) as u8; @@ -185,6 +196,20 @@ fn modrm_disp32(rm: RegUnit, reg: RegUnit, sink: &mut CS) sink.put1(b); } +/// Emit a mode 10 ModR/M byte indicating that a SIB byte is present. +fn modrm_sib_disp32(reg: RegUnit, sink: &mut CS) { + modrm_disp32(0b100, reg, sink); +} + +/// Emit a SIB byte with a base register and no scale+index. +fn sib_noindex(base: RegUnit, sink: &mut CS) { + let base = base as u8 & 7; + // SIB SS_III_BBB. + let mut b = 0b00_100_000; + b |= base; + sink.put1(b); +} + /// Emit a single-byte branch displacement to `destination`. fn disp1(destination: Ebb, func: &Function, sink: &mut CS) { let delta = func.offsets[destination].wrapping_sub(sink.offset() + 1);